diff --git a/.vscode/launch.json b/.vscode/launch.json index e2a19cfd..8c151dc2 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,13 +6,17 @@ "request": "launch", "name": "Debug operator binary", "cargo": { - "args": ["build"], + "args": [ + "build" + ], "filter": { "name": "stackable-{[ operator.name }]", "kind": "bin" } }, - "args": ["run"], + "args": [ + "run" + ], "cwd": "${workspaceFolder}" } ] diff --git a/deploy/config-spec/properties.yaml b/deploy/config-spec/properties.yaml index 52fb205b..b6f80cdd 100644 --- a/deploy/config-spec/properties.yaml +++ b/deploy/config-spec/properties.yaml @@ -1,3 +1,4 @@ +--- version: 0.1.0 spec: units: @@ -35,6 +36,8 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for successfully resolved domain names." description: "TTL for successfully resolved domain names." @@ -54,52 +57,18 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for domain names that cannot be resolved." description: "TTL for domain names that cannot be resolved." - - property: &zookeeperConnect - propertyNames: - - name: "zookeeper.connect" - kind: - type: "file" - file: "server.properties" - datatype: - type: "string" - unit: *unitUrl - defaultValues: - - fromVersion: "0.0.0" - value: "localhost:2181" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "The zookeeper connection string" - - - property: &zookeeperTimeout - propertyNames: - - name: "zookeeper.connection.timeout.ms" - kind: - type: "file" - file: "server.properties" - datatype: - type: "integer" - unit: *unitMilliseconds - defaultValues: - - fromVersion: "0.0.0" - value: "18000" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "Zookeeper connection timeout in milliseconds." - - property: &opaAuthorizerClassName propertyNames: - name: "authorizer.class.name" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" defaultValues: @@ -118,7 +87,7 @@ properties: - name: "opa.authorizer.url" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" unit: *unitUrl @@ -133,7 +102,7 @@ properties: - name: "opa.authorizer.cache.initial.capacity" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -151,7 +120,7 @@ properties: - name: "opa.authorizer.cache.maximum.size" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -169,7 +138,7 @@ properties: - name: "opa.authorizer.cache.expire.after.seconds" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -181,19 +150,3 @@ properties: required: false asOfVersion: "0.0.0" description: "The number of seconds after which the OPA authorizer cache expires" - - - property: &logDirs - propertyNames: - - name: "log.dirs" - kind: - type: "file" - file: "server.properties" - datatype: - type: "string" - recommendedValues: - - value: "/stackable/data/topicdata" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "A comma separated list of directories under which to store log files" diff --git a/deploy/helm/kafka-operator/configs/properties.yaml b/deploy/helm/kafka-operator/configs/properties.yaml index 52fb205b..b6f80cdd 100644 --- a/deploy/helm/kafka-operator/configs/properties.yaml +++ b/deploy/helm/kafka-operator/configs/properties.yaml @@ -1,3 +1,4 @@ +--- version: 0.1.0 spec: units: @@ -35,6 +36,8 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for successfully resolved domain names." description: "TTL for successfully resolved domain names." @@ -54,52 +57,18 @@ properties: roles: - name: "broker" required: true + - name: "controller" + required: true asOfVersion: "0.0.0" comment: "TTL for domain names that cannot be resolved." description: "TTL for domain names that cannot be resolved." - - property: &zookeeperConnect - propertyNames: - - name: "zookeeper.connect" - kind: - type: "file" - file: "server.properties" - datatype: - type: "string" - unit: *unitUrl - defaultValues: - - fromVersion: "0.0.0" - value: "localhost:2181" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "The zookeeper connection string" - - - property: &zookeeperTimeout - propertyNames: - - name: "zookeeper.connection.timeout.ms" - kind: - type: "file" - file: "server.properties" - datatype: - type: "integer" - unit: *unitMilliseconds - defaultValues: - - fromVersion: "0.0.0" - value: "18000" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "Zookeeper connection timeout in milliseconds." - - property: &opaAuthorizerClassName propertyNames: - name: "authorizer.class.name" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" defaultValues: @@ -118,7 +87,7 @@ properties: - name: "opa.authorizer.url" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "string" unit: *unitUrl @@ -133,7 +102,7 @@ properties: - name: "opa.authorizer.cache.initial.capacity" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -151,7 +120,7 @@ properties: - name: "opa.authorizer.cache.maximum.size" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -169,7 +138,7 @@ properties: - name: "opa.authorizer.cache.expire.after.seconds" kind: type: "file" - file: "server.properties" + file: "broker.properties" datatype: type: "integer" unit: *unitCapacity @@ -181,19 +150,3 @@ properties: required: false asOfVersion: "0.0.0" description: "The number of seconds after which the OPA authorizer cache expires" - - - property: &logDirs - propertyNames: - - name: "log.dirs" - kind: - type: "file" - file: "server.properties" - datatype: - type: "string" - recommendedValues: - - value: "/stackable/data/topicdata" - roles: - - name: "broker" - required: true - asOfVersion: "0.0.0" - description: "A comma separated list of directories under which to store log files" diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 4d695e91..9e03950a 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -609,6 +609,14 @@ spec: - roleGroups type: object clusterConfig: + default: + authentication: [] + authorization: + opa: null + tls: + internalSecretClass: tls + serverSecretClass: tls + zookeeperConfigMapName: null description: |- Kafka settings that affect all roles and role groups. @@ -685,10 +693,9 @@ spec: nullable: true type: string zookeeperConfigMapName: - description: Kafka requires a ZooKeeper cluster connection to run. Provide the name of the ZooKeeper [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) here. When using the [Stackable operator for Apache ZooKeeper](https://docs.stackable.tech/home/nightly/zookeeper/) to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. + description: Provide the name of the ZooKeeper [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) here. When using the [Stackable operator for Apache ZooKeeper](https://docs.stackable.tech/home/nightly/zookeeper/) to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper suppport was dropped. Please use the 'controller' role instead. + nullable: true type: string - required: - - zookeeperConfigMapName type: object clusterOperation: default: @@ -705,6 +712,573 @@ spec: description: Flag to stop the cluster. This means all deployed resources (e.g. Services, StatefulSets, ConfigMaps) are kept but all deployed Pods (e.g. replicas from a StatefulSet) are scaled to 0 and therefore stopped and removed. If applied at the same time with `reconciliationPaused`, the latter will pause reconciliation and `stopped` will take no effect until `reconciliationPaused` is set to false or removed. type: boolean type: object + controllers: + description: This struct represents a role - e.g. HDFS datanodes or Trino workers. It has a key-value-map containing all the roleGroups that are part of this role. Additionally, there is a `config`, which is configurable at the role *and* roleGroup level. Everything at roleGroup level is merged on top of what is configured on role level. There is also a second form of config, which can only be configured at role level, the `roleConfig`. You can learn more about this in the [Roles and role group concept documentation](https://docs.stackable.tech/home/nightly/concepts/roles-and-role-groups). + nullable: true + properties: + cliOverrides: + additionalProperties: + type: string + default: {} + type: object + config: + default: {} + properties: + affinity: + default: + nodeAffinity: null + nodeSelector: null + podAffinity: null + podAntiAffinity: null + description: These configuration settings control [Pod placement](https://docs.stackable.tech/home/nightly/concepts/operations/pod_placement). + properties: + nodeAffinity: + description: Same as the `spec.affinity.nodeAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + nodeSelector: + additionalProperties: + type: string + description: Simple key-value pairs forming a nodeSelector, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + podAffinity: + description: Same as the `spec.affinity.podAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + podAntiAffinity: + description: Same as the `spec.affinity.podAntiAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string + logging: + default: + containers: {} + enableVectorAgent: null + description: Logging configuration, learn more in the [logging concept documentation](https://docs.stackable.tech/home/nightly/concepts/logging). + properties: + containers: + additionalProperties: + anyOf: + - required: + - custom + - {} + description: Log configuration of the container + properties: + console: + description: Configuration for the console appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + custom: + description: Custom log configuration provided in a ConfigMap + properties: + configMap: + description: ConfigMap containing the log configuration files + nullable: true + type: string + type: object + file: + description: Configuration for the file appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + loggers: + additionalProperties: + description: Configuration of a logger + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + default: {} + description: Configuration per logger + type: object + type: object + description: Log configuration per container. + type: object + enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent. + nullable: true + type: boolean + type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string + resources: + default: + cpu: + max: null + min: null + memory: + limit: null + runtimeLimits: {} + storage: + logDirs: + capacity: null + description: Resource usage is configured here, this includes CPU usage, memory usage and disk storage usage, if this role needs any. + properties: + cpu: + default: + max: null + min: null + properties: + max: + description: The maximum amount of CPU cores that can be requested by Pods. Equivalent to the `limit` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + min: + description: The minimal amount of CPU cores that Pods need to run. Equivalent to the `request` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + type: object + memory: + properties: + limit: + description: 'The maximum amount of memory that should be available to the Pod. Specified as a byte [Quantity](https://kubernetes.io/docs/reference/kubernetes-api/common-definitions/quantity/), which means these suffixes are supported: E, P, T, G, M, k. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. For example, the following represent roughly the same value: `128974848, 129e6, 129M, 128974848000m, 123Mi`' + nullable: true + type: string + runtimeLimits: + description: Additional options that can be specified. + type: object + type: object + storage: + properties: + logDirs: + default: + capacity: null + properties: + capacity: + description: "Quantity is a fixed-point representation of a number. It provides convenient marshaling/unmarshaling in JSON and YAML, in addition to String() and AsInt64() accessors.\n\nThe serialization format is:\n\n``` ::= \n\n\t(Note that may be empty, from the \"\" case in .)\n\n ::= 0 | 1 | ... | 9 ::= | ::= | . | . | . ::= \"+\" | \"-\" ::= | ::= | | ::= Ki | Mi | Gi | Ti | Pi | Ei\n\n\t(International System of units; See: http://physics.nist.gov/cuu/Units/binary.html)\n\n ::= m | \"\" | k | M | G | T | P | E\n\n\t(Note that 1024 = 1Ki but 1000 = 1k; I didn't choose the capitalization.)\n\n ::= \"e\" | \"E\" ```\n\nNo matter which of the three exponent forms is used, no quantity may represent a number greater than 2^63-1 in magnitude, nor may it have more than 3 decimal places. Numbers larger or more precise will be capped or rounded up. (E.g.: 0.1m will rounded up to 1m.) This may be extended in the future if we require larger or smaller quantities.\n\nWhen a Quantity is parsed from a string, it will remember the type of suffix it had, and will use the same type again when it is serialized.\n\nBefore serializing, Quantity will be put in \"canonical form\". This means that Exponent/suffix will be adjusted up or down (with a corresponding increase or decrease in Mantissa) such that:\n\n- No precision is lost - No fractional digits will be emitted - The exponent (or suffix) is as large as possible.\n\nThe sign will be omitted unless the number is negative.\n\nExamples:\n\n- 1.5 will be serialized as \"1500m\" - 1.5Gi will be serialized as \"1536Mi\"\n\nNote that the quantity will NEVER be internally represented by a floating point number. That is the whole point of this exercise.\n\nNon-canonical values will still parse as long as they are well formed, but will be re-emitted in their canonical form. (So always use canonical form, or don't diff.)\n\nThis format is intended to make it difficult to use these numbers without writing some sort of special handling code in the hopes that that will cause implementors to also use a fixed point implementation." + nullable: true + type: string + selectors: + description: A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. + nullable: true + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + storageClass: + nullable: true + type: string + type: object + type: object + type: object + type: object + configOverrides: + additionalProperties: + additionalProperties: + type: string + type: object + default: {} + description: The `configOverrides` can be used to configure properties in product config files that are not exposed in the CRD. Read the [config overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#config-overrides) and consult the operator specific usage guide documentation for details on the available config files and settings for the specific product. + type: object + envOverrides: + additionalProperties: + type: string + default: {} + description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' + type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object + podOverrides: + default: {} + description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. + type: object + x-kubernetes-preserve-unknown-fields: true + roleConfig: + default: + podDisruptionBudget: + enabled: true + maxUnavailable: null + description: This is a product-agnostic RoleConfig, which is sufficient for most of the products. + properties: + podDisruptionBudget: + default: + enabled: true + maxUnavailable: null + description: |- + This struct is used to configure: + + 1. If PodDisruptionBudgets are created by the operator 2. The allowed number of Pods to be unavailable (`maxUnavailable`) + + Learn more in the [allowed Pod disruptions documentation](https://docs.stackable.tech/home/nightly/concepts/operations/pod_disruptions). + properties: + enabled: + default: true + description: Whether a PodDisruptionBudget should be written out for this role. Disabling this enables you to specify your own - custom - one. Defaults to true. + type: boolean + maxUnavailable: + description: The number of Pods that are allowed to be down because of voluntary disruptions. If you don't explicitly set this, the operator will use a sane default based upon knowledge about the individual product. + format: uint16 + minimum: 0.0 + nullable: true + type: integer + type: object + type: object + roleGroups: + additionalProperties: + properties: + cliOverrides: + additionalProperties: + type: string + default: {} + type: object + config: + default: {} + properties: + affinity: + default: + nodeAffinity: null + nodeSelector: null + podAffinity: null + podAntiAffinity: null + description: These configuration settings control [Pod placement](https://docs.stackable.tech/home/nightly/concepts/operations/pod_placement). + properties: + nodeAffinity: + description: Same as the `spec.affinity.nodeAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + nodeSelector: + additionalProperties: + type: string + description: Simple key-value pairs forming a nodeSelector, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + podAffinity: + description: Same as the `spec.affinity.podAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + podAntiAffinity: + description: Same as the `spec.affinity.podAntiAffinity` field on the Pod, see the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string + logging: + default: + containers: {} + enableVectorAgent: null + description: Logging configuration, learn more in the [logging concept documentation](https://docs.stackable.tech/home/nightly/concepts/logging). + properties: + containers: + additionalProperties: + anyOf: + - required: + - custom + - {} + description: Log configuration of the container + properties: + console: + description: Configuration for the console appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + custom: + description: Custom log configuration provided in a ConfigMap + properties: + configMap: + description: ConfigMap containing the log configuration files + nullable: true + type: string + type: object + file: + description: Configuration for the file appender + nullable: true + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + loggers: + additionalProperties: + description: Configuration of a logger + properties: + level: + description: The log level threshold. Log events with a lower log level are discarded. + enum: + - TRACE + - DEBUG + - INFO + - WARN + - ERROR + - FATAL + - NONE + nullable: true + type: string + type: object + default: {} + description: Configuration per logger + type: object + type: object + description: Log configuration per container. + type: object + enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent. + nullable: true + type: boolean + type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string + resources: + default: + cpu: + max: null + min: null + memory: + limit: null + runtimeLimits: {} + storage: + logDirs: + capacity: null + description: Resource usage is configured here, this includes CPU usage, memory usage and disk storage usage, if this role needs any. + properties: + cpu: + default: + max: null + min: null + properties: + max: + description: The maximum amount of CPU cores that can be requested by Pods. Equivalent to the `limit` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + min: + description: The minimal amount of CPU cores that Pods need to run. Equivalent to the `request` for Pod resource configuration. Cores are specified either as a decimal point number or as milli units. For example:`1.5` will be 1.5 cores, also written as `1500m`. + nullable: true + type: string + type: object + memory: + properties: + limit: + description: 'The maximum amount of memory that should be available to the Pod. Specified as a byte [Quantity](https://kubernetes.io/docs/reference/kubernetes-api/common-definitions/quantity/), which means these suffixes are supported: E, P, T, G, M, k. You can also use the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. For example, the following represent roughly the same value: `128974848, 129e6, 129M, 128974848000m, 123Mi`' + nullable: true + type: string + runtimeLimits: + description: Additional options that can be specified. + type: object + type: object + storage: + properties: + logDirs: + default: + capacity: null + properties: + capacity: + description: "Quantity is a fixed-point representation of a number. It provides convenient marshaling/unmarshaling in JSON and YAML, in addition to String() and AsInt64() accessors.\n\nThe serialization format is:\n\n``` ::= \n\n\t(Note that may be empty, from the \"\" case in .)\n\n ::= 0 | 1 | ... | 9 ::= | ::= | . | . | . ::= \"+\" | \"-\" ::= | ::= | | ::= Ki | Mi | Gi | Ti | Pi | Ei\n\n\t(International System of units; See: http://physics.nist.gov/cuu/Units/binary.html)\n\n ::= m | \"\" | k | M | G | T | P | E\n\n\t(Note that 1024 = 1Ki but 1000 = 1k; I didn't choose the capitalization.)\n\n ::= \"e\" | \"E\" ```\n\nNo matter which of the three exponent forms is used, no quantity may represent a number greater than 2^63-1 in magnitude, nor may it have more than 3 decimal places. Numbers larger or more precise will be capped or rounded up. (E.g.: 0.1m will rounded up to 1m.) This may be extended in the future if we require larger or smaller quantities.\n\nWhen a Quantity is parsed from a string, it will remember the type of suffix it had, and will use the same type again when it is serialized.\n\nBefore serializing, Quantity will be put in \"canonical form\". This means that Exponent/suffix will be adjusted up or down (with a corresponding increase or decrease in Mantissa) such that:\n\n- No precision is lost - No fractional digits will be emitted - The exponent (or suffix) is as large as possible.\n\nThe sign will be omitted unless the number is negative.\n\nExamples:\n\n- 1.5 will be serialized as \"1500m\" - 1.5Gi will be serialized as \"1536Mi\"\n\nNote that the quantity will NEVER be internally represented by a floating point number. That is the whole point of this exercise.\n\nNon-canonical values will still parse as long as they are well formed, but will be re-emitted in their canonical form. (So always use canonical form, or don't diff.)\n\nThis format is intended to make it difficult to use these numbers without writing some sort of special handling code in the hopes that that will cause implementors to also use a fixed point implementation." + nullable: true + type: string + selectors: + description: A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. + nullable: true + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + storageClass: + nullable: true + type: string + type: object + type: object + type: object + type: object + configOverrides: + additionalProperties: + additionalProperties: + type: string + type: object + default: {} + description: The `configOverrides` can be used to configure properties in product config files that are not exposed in the CRD. Read the [config overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#config-overrides) and consult the operator specific usage guide documentation for details on the available config files and settings for the specific product. + type: object + envOverrides: + additionalProperties: + type: string + default: {} + description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' + type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object + podOverrides: + default: {} + description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. + type: object + x-kubernetes-preserve-unknown-fields: true + replicas: + format: uint16 + minimum: 0.0 + nullable: true + type: integer + type: object + type: object + required: + - roleGroups + type: object image: anyOf: - required: @@ -754,7 +1328,6 @@ spec: type: string type: object required: - - clusterConfig - image type: object status: diff --git a/docs/modules/kafka/pages/index.adoc b/docs/modules/kafka/pages/index.adoc index 6b15fa8c..43671170 100644 --- a/docs/modules/kafka/pages/index.adoc +++ b/docs/modules/kafka/pages/index.adoc @@ -33,7 +33,7 @@ image::kafka_overview.drawio.svg[A diagram depicting the Kubernetes resources cr For every xref:concepts:roles-and-role-groups.adoc#_role_groups[role group] in the `broker` role the operator creates a StatefulSet. Multiple Services are created - one at role level, one per role group as well as one for every individual Pod - to allow access to the entire Kafka cluster, parts of it or just individual brokers. -For every StatefulSet (role group) a ConfigMap is deployed containing a `log4j.properties` file for xref:usage-guide/logging.adoc[logging] configuration and a `server.properties` file containing the whole Kafka configuration which is derived from the KafkaCluster resource. +For every StatefulSet, a ConfigMap is deployed containing xref:usage-guide/logging.adoc[logging] properties and a Kafka configuration file which is derived from the KafkaCluster resource. The operator creates a xref:concepts:service_discovery.adoc[] for the whole KafkaCluster which references the Service for the whole cluster. Other operators use this ConfigMap to connect to a Kafka cluster simply by name and it can also be used by custom third party applications to find the connection endpoint. diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc new file mode 100644 index 00000000..ea5c4946 --- /dev/null +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -0,0 +1,112 @@ += KRaft mode (experimental) +:description: Apache Kafka KRaft mode with the Stackable Operator for Apache Kafka + +WARNING: The Kafka KRaft mode is currently experimental, and subject to change. + +Apache Kafka's KRaft mode replaces Apache ZooKeeper with Kafka’s own built-in consensus mechanism based on the Raft protocol. +This simplifies Kafka’s architecture, reducing operational complexity by consolidating cluster metadata management into Kafka itself. + +WARNING: The Stackable Operator for Apache Kafka currently does not support automatic cluster upgrades from Apache ZooKeeper to KRaft. + +== Overview + +* Introduced: Kafka 2.8.0 (early preview, not production-ready). +* Matured: Kafka 3.3.x (production-ready, though ZooKeeper is still supported). +* Default & Recommended: Kafka 3.5+ strongly recommends KRaft for new clusters. +* Full Replacement: Kafka 4.0.0 (2025) removes ZooKeeper completely. +* Migration: Tools exist to migrate from ZooKeeper to KRaft, but new deployments should start with KRaft. + +== Configuration + +The Stackable Kafka operator introduces a new xref:concepts:roles-and-role-groups.adoc[role] in the KafkaCluster CRD called KRaft `Controller`. +Configuring the `Controller` will put Kafka into KRaft mode. Apache ZooKeeper will not be required anymore. + +[source,yaml] +---- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: kafka +spec: + image: + productVersion: "3.9.1" + brokers: + roleGroups: + default: + replicas: 1 + controllers: + roleGroups: + default: + replicas: 3 +---- + +NOTE: Using `spec.controllers` is mutually exclusive with `spec.clusterConfig.zookeeperConfigMapName`. + +=== Recommendations + +A minimal KRaft setup consisting of at least 3 Controllers has the following https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/[resource requirements]: + +* `600m` CPU request +* `3000m` CPU limit +* `3000Mi` memory request and limit +* `6Gi` persistent storage + +NOTE: The Controller replicas should sum up to an odd number for the Raft consensus. + +=== Resources + +Corresponding to the values above, the operator uses the following resource defaults: + +[source,yaml] +---- +controllers: + config: + resources: + memory: + limit: 1Gi + cpu: + min: 250m + max: 1000m + storage: + logDirs: + capacity: 2Gi +---- + +=== Overrides + +The configuration of overrides, JVM arguments etc. is similar to the Broker and documented on the xref:concepts:overrides.adoc[concepts page]. + +== Internal operator details + +KRaft mode requires major configuration changes compared to ZooKeeper: + +* `cluster-id`: This is set to the `metadata.name` of the KafkaCluster resource during initial formatting +* `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and added `replica` id. +* `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not supported. +* The operator configures a static voter list containing the controller pods. Controllers are not dynamicaly managed. + +== Known Issues + +* Automatic migration from Apache ZooKeeper to KRaft is not supported. +* Scaling controller replicas might lead to unstable clusters. +* Kerberos is currently not supported for KRaft in all versions. + +== Troubleshooting + +=== Cluster does not start + +Check that at least a quorum (majority) of controllers are reachable. + +=== Frequent leader elections + +Likely caused by controller resource starvation or unstable Kubernetes scheduling. + +=== Migration issues (ZooKeeper to KRaft) + +Ensure Kafka version 3.9.x and higher and follow the official migration documentation. +The Stackable Kafka operator currently does not support the migration. + +=== Scaling issues + +The https://developers.redhat.com/articles/2024/11/27/dynamic-kafka-controller-quorum[Dynamic scaling] is only supported from Kafka version 3.9.0. +If you are using older versions, automatic scaling may not work properly (e.g. adding or removing controller replicas). diff --git a/docs/modules/kafka/pages/usage-guide/overrides.adoc b/docs/modules/kafka/pages/usage-guide/overrides.adoc index 2abd5543..4c874743 100644 --- a/docs/modules/kafka/pages/usage-guide/overrides.adoc +++ b/docs/modules/kafka/pages/usage-guide/overrides.adoc @@ -8,7 +8,8 @@ IMPORTANT: Overriding operator-set properties (such as the ports) can interfere For a role or role group, at the same level of `config`, you can specify: `configOverrides` for the following files: -* `server.properties` +* `broker.properties` (brokers only) +* `controller.properties` (KRaft controllers only) * `security.properties` For example, if you want to set the `auto.create.topics.enable` to disable automatic topic creation, it can be configured in the KafkaCluster resource like so: @@ -19,7 +20,7 @@ brokers: roleGroups: default: configOverrides: - server.properties: + broker.properties: auto.create.topics.enable: "false" replicas: 1 ---- @@ -30,7 +31,7 @@ Just as for the `config`, it is possible to specify this at role level as well: ---- brokers: configOverrides: - server.properties: + broker.properties: auto.create.topics.enable: "false" roleGroups: default: diff --git a/docs/modules/kafka/pages/usage-guide/security.adoc b/docs/modules/kafka/pages/usage-guide/security.adoc index ffe7b151..8afb107b 100644 --- a/docs/modules/kafka/pages/usage-guide/security.adoc +++ b/docs/modules/kafka/pages/usage-guide/security.adoc @@ -217,7 +217,7 @@ spec: zookeeperConfigMapName: simple-kafka-znode brokers: configOverrides: - server.properties: + broker.properties: opa.authorizer.cache.initial.capacity: "100" opa.authorizer.cache.maximum.size: "100" opa.authorizer.cache.expire.after.seconds: "10" diff --git a/docs/modules/kafka/partials/nav.adoc b/docs/modules/kafka/partials/nav.adoc index 7de36dbd..faaeab95 100644 --- a/docs/modules/kafka/partials/nav.adoc +++ b/docs/modules/kafka/partials/nav.adoc @@ -2,6 +2,7 @@ ** xref:kafka:getting_started/installation.adoc[] ** xref:kafka:getting_started/first_steps.adoc[] * xref:kafka:usage-guide/index.adoc[] +** xref:kafka:usage-guide/kraft-controller.adoc[] ** xref:kafka:usage-guide/listenerclass.adoc[] ** xref:kafka:usage-guide/storage-resources.adoc[] ** xref:kafka:usage-guide/security.adoc[] diff --git a/docs/modules/kafka/partials/supported-versions.adoc b/docs/modules/kafka/partials/supported-versions.adoc index 4a4b9ab1..e9622e97 100644 --- a/docs/modules/kafka/partials/supported-versions.adoc +++ b/docs/modules/kafka/partials/supported-versions.adoc @@ -2,6 +2,6 @@ // This is a separate file, since it is used by both the direct Kafka documentation, and the overarching // Stackable Platform documentation. +* 4.1.0 (experimental) * 3.9.1 -* 3.9.0 (deprecated) * 3.7.2 (LTS) diff --git a/examples/logging/simple-kafka-cluster-opa-log4j.yaml b/examples/logging/simple-kafka-cluster-opa-log4j.yaml index dcda3e95..59b6df13 100644 --- a/examples/logging/simple-kafka-cluster-opa-log4j.yaml +++ b/examples/logging/simple-kafka-cluster-opa-log4j.yaml @@ -62,7 +62,7 @@ spec: logging: enableVectorAgent: true configOverrides: - server.properties: + broker.properties: opa.authorizer.cache.expire.after.seconds: "10" roleGroups: default: diff --git a/examples/opa/simple-kafka-cluster-opa-allow-all.yaml b/examples/opa/simple-kafka-cluster-opa-allow-all.yaml index 0890ba11..bd5768dc 100644 --- a/examples/opa/simple-kafka-cluster-opa-allow-all.yaml +++ b/examples/opa/simple-kafka-cluster-opa-allow-all.yaml @@ -59,7 +59,7 @@ spec: zookeeperConfigMapName: simple-kafka-znode brokers: configOverrides: - server.properties: + broker.properties: opa.authorizer.cache.expire.after.seconds: "0" roleGroups: default: diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs new file mode 100644 index 00000000..8887c904 --- /dev/null +++ b/rust/operator-binary/src/config/command.rs @@ -0,0 +1,238 @@ +use indoc::formatdoc; +use stackable_operator::{ + product_logging::framework::{ + create_vector_shutdown_file_command, remove_vector_shutdown_file_command, + }, + utils::COMMON_BASH_TRAP_FUNCTIONS, +}; + +use crate::{ + crd::{ + KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, + STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, + listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, + role::{ + KAFKA_ADVERTISED_LISTENERS, KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, + KAFKA_CONTROLLER_QUORUM_VOTERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, + KAFKA_NODE_ID, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BROKER_PROPERTIES_FILE, + controller::CONTROLLER_PROPERTIES_FILE, + }, + security::KafkaTlsSecurity, + v1alpha1, + }, + product_logging::STACKABLE_LOG_DIR, +}; + +/// Returns the commands to start the main Kafka container +pub fn broker_kafka_container_commands( + kafka: &v1alpha1::KafkaCluster, + cluster_id: &str, + controller_descriptors: Vec, + kafka_listeners: &KafkaListenerConfig, + opa_connect_string: Option<&str>, + kafka_security: &KafkaTlsSecurity, + product_version: &str, +) -> String { + formatdoc! {" + {COMMON_BASH_TRAP_FUNCTIONS} + {remove_vector_shutdown_file_command} + prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & + {set_realm_env} + + {broker_start_command} + + wait_for_termination $! + {create_vector_shutdown_file_command} + ", + remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR), + set_realm_env = match kafka_security.has_kerberos_enabled() { + true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {STACKABLE_KERBEROS_KRB5_PATH})"), + false => "".to_string(), + }, + broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, kafka_listeners, opa_connect_string, kafka_security, product_version), + } +} + +fn broker_start_command( + kafka: &v1alpha1::KafkaCluster, + cluster_id: &str, + controller_descriptors: Vec, + kafka_listeners: &KafkaListenerConfig, + opa_connect_string: Option<&str>, + kafka_security: &KafkaTlsSecurity, + product_version: &str, +) -> String { + let opa_config = match opa_connect_string { + None => "".to_string(), + Some(opa_connect_string) => { + format!(" --override \"opa.authorizer.url={opa_connect_string}\"") + } + }; + + let jaas_config = match kafka_security.has_kerberos_enabled() { + true => { + formatdoc! {" + --override \"{client_jaas_config}=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" \ + --override \"{bootstrap_jaas_config}=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\" + ", + client_jaas_config = KafkaListenerName::Client.listener_gssapi_sasl_jaas_config(), + bootstrap_jaas_config = KafkaListenerName::Bootstrap.listener_gssapi_sasl_jaas_config(), + service_name = KafkaRole::Broker.kerberos_service_name(), + broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR), + bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR), + } + } + false => "".to_string(), + }; + + let client_port = kafka_security.client_port(); + + // TODO: The properties file from the configmap is copied to the /tmp folder and appended with dynamic properties + // This should be improved: + // - mount emptyDir as readWriteConfig + // - use config-utils for proper replacements? + // - should we print the adapted properties file at startup? + if kafka.is_controller_configured() { + formatdoc! {" + export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + cp {config_dir}/{properties_file} /tmp/{properties_file} + + echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + ${KAFKA_NODE_ID_OFFSET}))\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_VOTERS}={controller_quorum_voters}\" >> /tmp/{properties_file} + + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} + bin/kafka-server-start.sh /tmp/{properties_file} {opa_config}{jaas_config} & + ", + config_dir = STACKABLE_CONFIG_DIR, + properties_file = BROKER_PROPERTIES_FILE, + bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), + listeners = kafka_listeners.listeners(), + advertised_listeners = kafka_listeners.advertised_listeners(), + listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), + controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), + initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), + } + } else { + formatdoc! {" + bin/kafka-server-start.sh {config_dir}/{properties_file} \ + --override \"zookeeper.connect=$ZOOKEEPER\" \ + --override \"{KAFKA_LISTENERS}={listeners}\" \ + --override \"{KAFKA_ADVERTISED_LISTENERS}={advertised_listeners}\" \ + --override \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" \ + {opa_config} \ + {jaas_config} \ + &", + config_dir = STACKABLE_CONFIG_DIR, + properties_file = BROKER_PROPERTIES_FILE, + listeners = kafka_listeners.listeners(), + advertised_listeners = kafka_listeners.advertised_listeners(), + listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), + } + } +} + +pub fn controller_kafka_container_command( + cluster_id: &str, + controller_descriptors: Vec, + kafka_listeners: &KafkaListenerConfig, + kafka_security: &KafkaTlsSecurity, + product_version: &str, +) -> String { + let client_port = kafka_security.client_port(); + + // TODO: The properties file from the configmap is copied to the /tmp folder and appended with dynamic properties + // This should be improved: + // - mount emptyDir as readWriteConfig + // - use config-utils for proper replacements? + // - should we print the adapted properties file at startup? + formatdoc! {" + {COMMON_BASH_TRAP_FUNCTIONS} + {remove_vector_shutdown_file_command} + prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & + + export REPLICA_ID=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + cp {config_dir}/{properties_file} /tmp/{properties_file} + + echo \"{KAFKA_NODE_ID}=$((REPLICA_ID + ${KAFKA_NODE_ID_OFFSET}))\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS}={bootstrap_servers}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENERS}={listeners}\" >> /tmp/{properties_file} + echo \"{KAFKA_LISTENER_SECURITY_PROTOCOL_MAP}={listener_security_protocol_map}\" >> /tmp/{properties_file} + echo \"{KAFKA_CONTROLLER_QUORUM_VOTERS}={controller_quorum_voters}\" >> /tmp/{properties_file} + + bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} + bin/kafka-server-start.sh /tmp/{properties_file} & + + wait_for_termination $! + {create_vector_shutdown_file_command} + ", + remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + config_dir = STACKABLE_CONFIG_DIR, + properties_file = CONTROLLER_PROPERTIES_FILE, + bootstrap_servers = to_bootstrap_servers(&controller_descriptors, client_port), + listeners = to_listeners(client_port), + listener_security_protocol_map = to_listener_security_protocol_map(kafka_listeners), + initial_controller_command = initial_controllers_command(&controller_descriptors, product_version, client_port), + controller_quorum_voters = to_quorum_voters(&controller_descriptors, client_port), + create_vector_shutdown_file_command = create_vector_shutdown_file_command(STACKABLE_LOG_DIR) + } +} + +fn to_listeners(port: u16) -> String { + // The environment variables are set in the statefulset of the controller + format!( + "{listener_name}://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:{port}", + listener_name = KafkaListenerName::Controller + ) +} + +fn to_listener_security_protocol_map(kafka_listeners: &KafkaListenerConfig) -> String { + kafka_listeners + .listener_security_protocol_map_for_listener(&KafkaListenerName::Controller) + .unwrap_or("".to_string()) +} + +fn to_initial_controllers(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { + controller_descriptors + .iter() + .map(|desc| desc.as_voter(port)) + .collect::>() + .join(",") +} + +// TODO: This can be removed once 3.7.2 is removed. Used in command.rs. +fn to_quorum_voters(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { + controller_descriptors + .iter() + .map(|desc| desc.as_quorum_voter(port)) + .collect::>() + .join(",") +} + +fn to_bootstrap_servers(controller_descriptors: &[KafkaPodDescriptor], port: u16) -> String { + controller_descriptors + .iter() + .map(|desc| format!("{fqdn}:{port}", fqdn = desc.fqdn())) + .collect::>() + .join(",") +} + +fn initial_controllers_command( + controller_descriptors: &[KafkaPodDescriptor], + product_version: &str, + client_port: u16, +) -> String { + match product_version.starts_with("3.7") { + true => "".to_string(), + false => format!( + "--initial-controllers {initial_controllers}", + initial_controllers = to_initial_controllers(controller_descriptors, client_port), + ), + } +} diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 355a42cb..f3ecb9e8 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -5,8 +5,7 @@ use stackable_operator::{ }; use crate::crd::{ - JVM_SECURITY_PROPERTIES_FILE, KafkaConfig, KafkaConfigFragment, METRICS_PORT, - STACKABLE_CONFIG_DIR, + JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, STACKABLE_CONFIG_DIR, role::AnyConfig, }; const JAVA_HEAP_FACTOR: f32 = 0.8; @@ -26,14 +25,14 @@ pub enum Error { } /// All JVM arguments. -fn construct_jvm_args( - merged_config: &KafkaConfig, - role: &Role, +fn construct_jvm_args( + merged_config: &AnyConfig, + role: &Role, role_group: &str, ) -> Result, Error> { let heap_size = MemoryQuantity::try_from( merged_config - .resources + .resources() .memory .limit .as_ref() @@ -68,9 +67,9 @@ fn construct_jvm_args( /// Arguments that go into `EXTRA_ARGS`, so *not* the heap settings (which you can get using /// [`construct_heap_jvm_args`]). -pub fn construct_non_heap_jvm_args( - merged_config: &KafkaConfig, - role: &Role, +pub fn construct_non_heap_jvm_args( + merged_config: &AnyConfig, + role: &Role, role_group: &str, ) -> Result { let mut jvm_args = construct_jvm_args(merged_config, role, role_group)?; @@ -81,9 +80,9 @@ pub fn construct_non_heap_jvm_args( /// Arguments that go into `KAFKA_HEAP_OPTS`. /// You can get the normal JVM arguments using [`construct_non_heap_jvm_args`]. -pub fn construct_heap_jvm_args( - merged_config: &KafkaConfig, - role: &Role, +pub fn construct_heap_jvm_args( + merged_config: &AnyConfig, + role: &Role, role_group: &str, ) -> Result { let mut jvm_args = construct_jvm_args(merged_config, role, role_group)?; @@ -101,7 +100,10 @@ fn is_heap_jvm_argument(jvm_argument: &str) -> bool { #[cfg(test)] mod tests { use super::*; - use crate::crd::{KafkaRole, v1alpha1}; + use crate::crd::{ + role::{KafkaRole, broker::BrokerConfigFragment}, + v1alpha1, + }; #[test] fn test_construct_jvm_arguments_defaults() { @@ -130,7 +132,7 @@ mod tests { "-Djava.security.properties=/stackable/config/security.properties \ -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9606:/stackable/jmx/broker.yaml" ); - assert_eq!(heap_jvm_args, "-Xmx819m -Xms819m"); + assert_eq!(heap_jvm_args, "-Xmx1638m -Xms1638m"); } #[test] @@ -186,16 +188,18 @@ mod tests { fn construct_boilerplate( kafka_cluster: &str, ) -> ( - KafkaConfig, - Role, + AnyConfig, + Role, String, ) { let kafka: v1alpha1::KafkaCluster = serde_yaml::from_str(kafka_cluster).expect("illegal test input"); let kafka_role = KafkaRole::Broker; - let rolegroup_ref = kafka.broker_rolegroup_ref("default"); - let merged_config = kafka.merged_config(&kafka_role, &rolegroup_ref).unwrap(); + let rolegroup_ref = kafka.rolegroup_ref(&kafka_role, "default"); + let merged_config = kafka_role + .merged_config(&kafka, &rolegroup_ref.role_group) + .unwrap(); let role = kafka.spec.brokers.unwrap(); (merged_config, role, "default".to_owned()) diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs index 271c6d99..ae92b3c2 100644 --- a/rust/operator-binary/src/config/mod.rs +++ b/rust/operator-binary/src/config/mod.rs @@ -1 +1,3 @@ +pub mod command; pub mod jvm; +pub mod node_id_hasher; diff --git a/rust/operator-binary/src/config/node_id_hasher.rs b/rust/operator-binary/src/config/node_id_hasher.rs new file mode 100644 index 00000000..eebee090 --- /dev/null +++ b/rust/operator-binary/src/config/node_id_hasher.rs @@ -0,0 +1,27 @@ +use stackable_operator::role_utils::RoleGroupRef; + +use crate::crd::v1alpha1::KafkaCluster; + +pub fn node_id_hash32_offset(rolegroup_ref: &RoleGroupRef) -> u32 { + let hash = fnv_hash32(&format!( + "{role}-{rolegroup}", + role = rolegroup_ref.role, + rolegroup = rolegroup_ref.role_group + )); + let range = hash & 0x0000FFFF; + // Kafka uses signed integer + range * 0x00007FFF +} + +/// Simple FNV-1a hash impl +fn fnv_hash32(input: &str) -> u32 { + const FNV_OFFSET: u32 = 0x811c9dc5; + const FNV_PRIME: u32 = 0x01000193; + + let mut hash = FNV_OFFSET; + for byte in input.as_bytes() { + hash ^= u32::from(*byte); + hash = hash.wrapping_mul(FNV_PRIME); + } + hash +} diff --git a/rust/operator-binary/src/crd/affinity.rs b/rust/operator-binary/src/crd/affinity.rs index 392c0085..01c07d86 100644 --- a/rust/operator-binary/src/crd/affinity.rs +++ b/rust/operator-binary/src/crd/affinity.rs @@ -3,14 +3,14 @@ use stackable_operator::{ k8s_openapi::api::core::v1::PodAntiAffinity, }; -use crate::crd::{APP_NAME, KafkaRole}; +use crate::crd::APP_NAME; -pub fn get_affinity(cluster_name: &str, role: &KafkaRole) -> StackableAffinityFragment { +pub fn get_affinity(cluster_name: &str, role: &str) -> StackableAffinityFragment { StackableAffinityFragment { pod_affinity: None, pod_anti_affinity: Some(PodAntiAffinity { preferred_during_scheduling_ignored_during_execution: Some(vec![ - affinity_between_role_pods(APP_NAME, cluster_name, &role.to_string(), 70), + affinity_between_role_pods(APP_NAME, cluster_name, role, 70), ]), required_during_scheduling_ignored_during_execution: None, }), @@ -32,8 +32,7 @@ mod tests { }, }; - use super::*; - use crate::crd::v1alpha1; + use crate::crd::{KafkaRole, v1alpha1}; #[rstest] #[case(KafkaRole::Broker)] @@ -56,9 +55,7 @@ mod tests { let kafka: v1alpha1::KafkaCluster = serde_yaml::from_str(input).expect("illegal test input"); - let merged_config = kafka - .merged_config(&role, &role.rolegroup_ref(&kafka, "default")) - .unwrap(); + let merged_config = role.merged_config(&kafka, "default").unwrap(); assert_eq!( merged_config.affinity, diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 5de0bde1..96b995a9 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -42,6 +42,68 @@ pub enum KafkaListenerName { Internal, #[strum(serialize = "BOOTSTRAP")] Bootstrap, + #[strum(serialize = "CONTROLLER")] + Controller, + #[strum(serialize = "CONTROLLER_AUTH")] + ControllerAuth, +} + +impl KafkaListenerName { + pub fn listener_ssl_keystore_location(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.keystore.location", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_keystore_password(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.keystore.password", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_keystore_type(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.keystore.type", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_truststore_location(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.truststore.location", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_truststore_password(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.truststore.password", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_truststore_type(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.truststore.type", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_ssl_client_auth(&self) -> String { + format!( + "listener.name.{listener_name}.ssl.client.auth", + listener_name = self.to_string().to_lowercase() + ) + } + + pub fn listener_gssapi_sasl_jaas_config(&self) -> String { + format!( + "listener.name.{listener_name}.gssapi.sasl.jaas.config", + listener_name = self.to_string().to_lowercase() + ) + } } #[derive(Debug)] @@ -52,7 +114,7 @@ pub struct KafkaListenerConfig { } impl KafkaListenerConfig { - /// Returns the `listeners` for the Kafka `server.properties` config. + /// Returns the `listeners` for the Kafka `broker.properties` config. pub fn listeners(&self) -> String { self.listeners .iter() @@ -61,7 +123,7 @@ impl KafkaListenerConfig { .join(",") } - /// Returns the `advertised.listeners` for the Kafka `server.properties` config. + /// Returns the `advertised.listeners` for the Kafka `broker.properties` config. /// May contain ENV variables and therefore should be used as cli argument /// like --override \"advertised.listeners=xxx\". pub fn advertised_listeners(&self) -> String { @@ -72,7 +134,7 @@ impl KafkaListenerConfig { .join(",") } - /// Returns the `listener.security.protocol.map` for the Kafka `server.properties` config. + /// Returns the `listener.security.protocol.map` for the Kafka `broker.properties` config. pub fn listener_security_protocol_map(&self) -> String { self.listener_security_protocol_map .iter() @@ -80,6 +142,16 @@ impl KafkaListenerConfig { .collect::>() .join(",") } + + /// Returns the `listener.security.protocol.map` for the Kafka `broker.properties` config. + pub fn listener_security_protocol_map_for_listener( + &self, + listener_name: &KafkaListenerName, + ) -> Option { + self.listener_security_protocol_map + .get(listener_name) + .map(|protocol| format!("{listener_name}:{protocol}")) + } } #[derive(Debug)] @@ -104,7 +176,8 @@ pub fn get_kafka_listener_config( let pod_fqdn = pod_fqdn(kafka, object_name, cluster_info)?; let mut listeners = vec![]; let mut advertised_listeners = vec![]; - let mut listener_security_protocol_map = BTreeMap::new(); + let mut listener_security_protocol_map: BTreeMap = + BTreeMap::new(); // CLIENT if kafka_security.tls_client_authentication_class().is_some() { @@ -124,6 +197,10 @@ pub fn get_kafka_listener_config( }); listener_security_protocol_map .insert(KafkaListenerName::ClientAuth, KafkaListenerProtocol::Ssl); + listener_security_protocol_map.insert( + KafkaListenerName::ControllerAuth, + KafkaListenerProtocol::Ssl, + ); } else if kafka_security.has_kerberos_enabled() { // 2) Kerberos and TLS authentication classes are mutually exclusive listeners.push(KafkaListener { @@ -141,6 +218,10 @@ pub fn get_kafka_listener_config( }); listener_security_protocol_map .insert(KafkaListenerName::Client, KafkaListenerProtocol::SaslSsl); + listener_security_protocol_map.insert( + KafkaListenerName::Controller, + KafkaListenerProtocol::SaslSsl, + ); } else if kafka_security.tls_server_secret_class().is_some() { // 3) If no client authentication but tls is required we expose CLIENT with SSL listeners.push(KafkaListener { @@ -177,7 +258,7 @@ pub fn get_kafka_listener_config( .insert(KafkaListenerName::Client, KafkaListenerProtocol::Plaintext); } - // INTERNAL + // INTERNAL / CONTROLLER if kafka_security.has_kerberos_enabled() || kafka_security.tls_internal_secret_class().is_some() { // 5) & 6) Kerberos and TLS authentication classes are mutually exclusive but both require internal tls to be used @@ -193,6 +274,8 @@ pub fn get_kafka_listener_config( }); listener_security_protocol_map .insert(KafkaListenerName::Internal, KafkaListenerProtocol::Ssl); + listener_security_protocol_map + .insert(KafkaListenerName::Controller, KafkaListenerProtocol::Ssl); } else { // 7) If no internal tls is required we expose INTERNAL as PLAINTEXT listeners.push(KafkaListener { @@ -209,6 +292,10 @@ pub fn get_kafka_listener_config( KafkaListenerName::Internal, KafkaListenerProtocol::Plaintext, ); + listener_security_protocol_map.insert( + KafkaListenerName::Controller, + KafkaListenerProtocol::Plaintext, + ); } // BOOTSTRAP @@ -349,11 +436,15 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{controller_name}:{controller_protocol},{controller_auth_name}:{controller_auth_protocol}", name = KafkaListenerName::ClientAuth, protocol = KafkaListenerProtocol::Ssl, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Ssl, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Ssl, + controller_auth_name = KafkaListenerName::ControllerAuth, + controller_auth_protocol = KafkaListenerProtocol::Ssl, ) ); @@ -397,11 +488,13 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{controller_name}:{controller_protocol}", name = KafkaListenerName::Client, protocol = KafkaListenerProtocol::Ssl, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Ssl, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Ssl, ) ); @@ -446,11 +539,13 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{controller_name}:{controller_protocol}", name = KafkaListenerName::Client, protocol = KafkaListenerProtocol::Plaintext, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Plaintext, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Plaintext, ) ); } @@ -537,13 +632,15 @@ mod tests { assert_eq!( config.listener_security_protocol_map(), format!( - "{name}:{protocol},{internal_name}:{internal_protocol},{bootstrap_name}:{bootstrap_protocol}", + "{name}:{protocol},{internal_name}:{internal_protocol},{bootstrap_name}:{bootstrap_protocol},{controller_name}:{controller_protocol}", name = KafkaListenerName::Client, protocol = KafkaListenerProtocol::SaslSsl, internal_name = KafkaListenerName::Internal, internal_protocol = KafkaListenerProtocol::Ssl, bootstrap_name = KafkaListenerName::Bootstrap, bootstrap_protocol = KafkaListenerProtocol::SaslSsl, + controller_name = KafkaListenerName::Controller, + controller_protocol = KafkaListenerProtocol::Ssl, ) ); } diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 56de9409..fc801bc6 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -2,45 +2,36 @@ pub mod affinity; pub mod authentication; pub mod authorization; pub mod listener; +pub mod role; pub mod security; pub mod tls; -use std::{collections::BTreeMap, str::FromStr}; +use std::collections::{BTreeMap, HashMap}; -use affinity::get_affinity; use authentication::KafkaAuthentication; use serde::{Deserialize, Serialize}; -use snafu::{OptionExt, ResultExt, Snafu}; +use snafu::{OptionExt, Snafu}; use stackable_operator::{ commons::{ - affinity::StackableAffinity, - cluster_operation::ClusterOperation, + cluster_operation::ClusterOperation, networking::DomainName, product_image_selection::ProductImage, - resources::{ - CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, - PvcConfig, PvcConfigFragment, Resources, ResourcesFragment, - }, }, - config::{ - fragment::{self, Fragment, ValidationError}, - merge::Merge, - }, - k8s_openapi::{ - api::core::v1::PersistentVolumeClaim, apimachinery::pkg::api::resource::Quantity, - }, - kube::{CustomResource, ResourceExt, runtime::reflector::ObjectRef}, - product_config_utils::Configuration, - product_logging::{self, spec::Logging}, - role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef}, + kube::{CustomResource, runtime::reflector::ObjectRef}, + role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroupRef}, schemars::{self, JsonSchema}, - shared::time::Duration, status::condition::{ClusterCondition, HasStatusCondition}, utils::cluster_info::KubernetesClusterInfo, versioned::versioned, }; -use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; -use crate::crd::{authorization::KafkaAuthorization, tls::KafkaTls}; +use crate::{ + config::node_id_hasher::node_id_hash32_offset, + crd::{ + authorization::KafkaAuthorization, + role::{KafkaRole, broker::BrokerConfigFragment, controller::ControllerConfigFragment}, + tls::KafkaTls, + }, +}; pub const DOCKER_IMAGE_BASE_NAME: &str = "kafka"; pub const APP_NAME: &str = "kafka"; @@ -49,7 +40,6 @@ pub const OPERATOR_NAME: &str = "kafka.stackable.tech"; pub const METRICS_PORT_NAME: &str = "metrics"; pub const METRICS_PORT: u16 = 9606; // config files -pub const SERVER_PROPERTIES_FILE: &str = "server.properties"; pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties"; // env vars pub const KAFKA_HEAP_OPTS: &str = "KAFKA_HEAP_OPTS"; @@ -62,46 +52,37 @@ pub const STACKABLE_LISTENER_BROKER_DIR: &str = "/stackable/listener-broker"; pub const STACKABLE_LISTENER_BOOTSTRAP_DIR: &str = "/stackable/listener-bootstrap"; pub const STACKABLE_DATA_DIR: &str = "/stackable/data"; pub const STACKABLE_CONFIG_DIR: &str = "/stackable/config"; -pub const STACKABLE_LOG_CONFIG_DIR: &str = "/stackable/log_config"; -pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; // kerberos pub const STACKABLE_KERBEROS_DIR: &str = "/stackable/kerberos"; pub const STACKABLE_KERBEROS_KRB5_PATH: &str = "/stackable/kerberos/krb5.conf"; -const DEFAULT_BROKER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(30); - #[derive(Snafu, Debug)] pub enum Error { - #[snafu(display("object has no namespace associated"))] - NoNamespace, - - #[snafu(display("failed to validate config of rolegroup {rolegroup}"))] - RoleGroupValidation { - rolegroup: RoleGroupRef, - source: ValidationError, - }, - - #[snafu(display("the Kafka role [{role}] is missing from spec"))] - MissingKafkaRole { role: String }, + #[snafu(display("The Kafka role [{role}] is missing from spec"))] + MissingRole { role: String }, - #[snafu(display("the role {role} is not defined"))] - CannotRetrieveKafkaRole { role: String }, + #[snafu(display("Object has no namespace associated"))] + NoNamespace, - #[snafu(display("the Kafka node role group [{role_group}] is missing from spec"))] - MissingKafkaRoleGroup { role_group: String }, + #[snafu(display( + "Kafka version 4 and higher requires a Kraft controller (configured via `spec.controller`)" + ))] + Kafka4RequiresKraft, - #[snafu(display("the role group {role_group} is not defined"))] - CannotRetrieveKafkaRoleGroup { role_group: String }, + #[snafu(display( + "Kraft controller (`spec.controller`) and ZooKeeper (`spec.clusterConfig.zookeeperConfigMapName`) are configured. Please only choose one" + ))] + KraftAndZookeeperConfigured, - #[snafu(display("unknown role {role}. Should be one of {roles:?}"))] - UnknownKafkaRole { - source: strum::ParseError, - role: String, - roles: Vec, + #[snafu(display( + "Could not calculate 'node.id' hash offset for role '{role}' and rolegroup '{rolegroup}' which collides with role '{coliding_role}' and rolegroup '{colliding_rolegroup}'. Please try to rename one of the rolegroups." + ))] + KafkaNodeIdHashCollision { + role: KafkaRole, + rolegroup: String, + coliding_role: KafkaRole, + colliding_rolegroup: String, }, - - #[snafu(display("fragment validation failure"))] - FragmentValidationFailure { source: ValidationError }, } #[versioned( @@ -132,11 +113,16 @@ pub mod versioned { pub image: ProductImage, // no doc - docs in Role struct. - pub brokers: Option>, + pub brokers: Option>, + + // no doc - docs in Role struct. + pub controllers: + Option>, /// Kafka settings that affect all roles and role groups. /// /// The settings in the `clusterConfig` are cluster wide settings that do not need to be configurable at role or role group level. + #[serde(default)] pub cluster_config: v1alpha1::KafkaClusterConfig, // no doc - docs in ClusterOperation struct. @@ -169,11 +155,24 @@ pub mod versioned { #[serde(skip_serializing_if = "Option::is_none")] pub vector_aggregator_config_map_name: Option, - /// Kafka requires a ZooKeeper cluster connection to run. /// Provide the name of the ZooKeeper [discovery ConfigMap](DOCS_BASE_URL_PLACEHOLDER/concepts/service_discovery) /// here. When using the [Stackable operator for Apache ZooKeeper](DOCS_BASE_URL_PLACEHOLDER/zookeeper/) /// to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. - pub zookeeper_config_map_name: String, + /// This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper suppport was dropped. + /// Please use the 'controller' role instead. + pub zookeeper_config_map_name: Option, + } +} + +impl Default for v1alpha1::KafkaClusterConfig { + fn default() -> Self { + Self { + authentication: vec![], + authorization: KafkaAuthorization::default(), + tls: tls::default_kafka_tls(), + vector_aggregator_config_map_name: None, + zookeeper_config_map_name: None, + } } } @@ -187,109 +186,152 @@ impl HasStatusCondition for v1alpha1::KafkaCluster { } impl v1alpha1::KafkaCluster { + /// Supporting Kraft alongside Zookeeper requires a couple of CRD checks + /// - If Kafka 4 and higher is used, no zookeeper config map ref has to be provided + /// - Configuring the controller role means no zookeeper config map ref has to be provided + pub fn check_kraft_vs_zookeeper(&self, product_version: &str) -> Result<(), Error> { + if product_version.starts_with("4.") && self.spec.controllers.is_none() { + return Err(Error::Kafka4RequiresKraft); + } + + if self.spec.controllers.is_some() + && self.spec.cluster_config.zookeeper_config_map_name.is_some() + { + return Err(Error::KraftAndZookeeperConfigured); + } + + Ok(()) + } + + pub fn is_controller_configured(&self) -> bool { + self.spec.controllers.is_some() + } + + // The cluster-id for Kafka + pub fn cluster_id(&self) -> Option<&str> { + self.metadata.name.as_deref() + } + /// The name of the load-balanced Kubernetes Service providing the bootstrap address. Kafka clients will use this /// to get a list of broker addresses and will use those to transmit data to the correct broker. pub fn bootstrap_service_name(&self, rolegroup: &RoleGroupRef) -> String { format!("{}-bootstrap", rolegroup.object_name()) } - /// Metadata about a broker rolegroup - pub fn broker_rolegroup_ref(&self, group_name: impl Into) -> RoleGroupRef { + /// Metadata about a rolegroup + pub fn rolegroup_ref( + &self, + role: &KafkaRole, + group_name: impl Into, + ) -> RoleGroupRef { RoleGroupRef { cluster: ObjectRef::from_obj(self), - role: KafkaRole::Broker.to_string(), + role: role.to_string(), role_group: group_name.into(), } } - pub fn role( - &self, - role_variant: &KafkaRole, - ) -> Result<&Role, Error> { - match role_variant { - KafkaRole::Broker => self.spec.brokers.as_ref(), + pub fn role_config(&self, role: &KafkaRole) -> Option<&GenericRoleConfig> { + match role { + KafkaRole::Broker => self.spec.brokers.as_ref().map(|b| &b.role_config), + KafkaRole::Controller => self.spec.controllers.as_ref().map(|b| &b.role_config), } - .with_context(|| CannotRetrieveKafkaRoleSnafu { - role: role_variant.to_string(), - }) } - pub fn rolegroup( + pub fn broker_role( &self, - rolegroup_ref: &RoleGroupRef, - ) -> Result<&RoleGroup, Error> { - let role_variant = - KafkaRole::from_str(&rolegroup_ref.role).with_context(|_| UnknownKafkaRoleSnafu { - role: rolegroup_ref.role.to_owned(), - roles: KafkaRole::roles(), - })?; - - let role = self.role(&role_variant)?; - role.role_groups - .get(&rolegroup_ref.role_group) - .with_context(|| CannotRetrieveKafkaRoleGroupSnafu { - role_group: rolegroup_ref.role_group.to_owned(), - }) + ) -> Result<&Role, Error> { + self.spec.brokers.as_ref().context(MissingRoleSnafu { + role: KafkaRole::Broker.to_string(), + }) } - pub fn role_config(&self, role: &KafkaRole) -> Option<&GenericRoleConfig> { - match role { - KafkaRole::Broker => self.spec.brokers.as_ref().map(|b| &b.role_config), - } + pub fn controller_role( + &self, + ) -> Result<&Role, Error> { + self.spec.controllers.as_ref().context(MissingRoleSnafu { + role: KafkaRole::Controller.to_string(), + }) } - /// List all pods expected to form the cluster + /// List all pod descriptors of a provided role expected to form the cluster. /// /// We try to predict the pods here rather than looking at the current cluster state in order to /// avoid instance churn. - pub fn pods(&self) -> Result + '_, Error> { - let ns = self.metadata.namespace.clone().context(NoNamespaceSnafu)?; - Ok(self - .spec - .brokers - .iter() - .flat_map(|role| &role.role_groups) - // Order rolegroups consistently, to avoid spurious downstream rewrites - .collect::>() - .into_iter() - .flat_map(move |(rolegroup_name, rolegroup)| { - let rolegroup_ref = self.broker_rolegroup_ref(rolegroup_name); - let ns = ns.clone(); - (0..rolegroup.replicas.unwrap_or(0)).map(move |i| KafkaPodRef { - namespace: ns.clone(), - role_group_service_name: rolegroup_ref.object_name(), - pod_name: format!("{}-{}", rolegroup_ref.object_name(), i), - }) - })) + pub fn pod_descriptors( + &self, + requested_kafka_role: &KafkaRole, + cluster_info: &KubernetesClusterInfo, + ) -> Result, Error> { + let namespace = self.metadata.namespace.clone().context(NoNamespaceSnafu)?; + let mut pod_descriptors = Vec::new(); + let mut seen_hashes = HashMap::::new(); + + for current_role in KafkaRole::roles() { + let rolegroup_replicas = self.extract_rolegroup_replicas(¤t_role)?; + for (rolegroup, replicas) in rolegroup_replicas { + let rolegroup_ref = self.rolegroup_ref(¤t_role, &rolegroup); + let node_id_hash_offset = node_id_hash32_offset(&rolegroup_ref); + + // check collisions + match seen_hashes.get(&node_id_hash_offset) { + Some((coliding_role, coliding_rolegroup)) => { + return KafkaNodeIdHashCollisionSnafu { + role: current_role.clone(), + rolegroup: rolegroup.clone(), + coliding_role: coliding_role.clone(), + colliding_rolegroup: coliding_rolegroup.to_string(), + } + .fail(); + } + None => { + seen_hashes.insert(node_id_hash_offset, (current_role.clone(), rolegroup)) + } + }; + + // only return descriptors for selected role + if current_role == *requested_kafka_role { + for replica in 0..replicas { + pod_descriptors.push(KafkaPodDescriptor { + namespace: namespace.clone(), + role_group_service_name: rolegroup_ref.object_name(), + replica, + cluster_domain: cluster_info.cluster_domain.clone(), + node_id: node_id_hash_offset + u32::from(replica), + }); + } + } + } + } + + Ok(pod_descriptors) } - /// Retrieve and merge resource configs for role and role groups - pub fn merged_config( + fn extract_rolegroup_replicas( &self, - role: &KafkaRole, - rolegroup_ref: &RoleGroupRef, - ) -> Result { - // Initialize the result with all default values as baseline - let conf_defaults = KafkaConfig::default_config(&self.name_any(), role); - - // Retrieve role resource config - let role = self.role(role)?; - let mut conf_role = role.config.config.to_owned(); - - // Retrieve rolegroup specific resource config - let role_group = self.rolegroup(rolegroup_ref)?; - let mut conf_role_group = role_group.config.config.to_owned(); - - // Merge more specific configs into default config - // Hierarchy is: - // 1. RoleGroup - // 2. Role - // 3. Default - conf_role.merge(&conf_defaults); - conf_role_group.merge(&conf_role); - - tracing::debug!("Merged config: {:?}", conf_role_group); - fragment::validate(conf_role_group).context(FragmentValidationFailureSnafu) + kafka_role: &KafkaRole, + ) -> Result, Error> { + Ok(match kafka_role { + KafkaRole::Broker => self + .broker_role() + .iter() + .flat_map(|role| &role.role_groups) + .flat_map(|(rolegroup_name, rolegroup)| { + std::iter::once((rolegroup_name.to_string(), rolegroup.replicas.unwrap_or(0))) + }) + // Order rolegroups consistently, to avoid spurious downstream rewrites + .collect::>(), + + KafkaRole::Controller => self + .controller_role() + .iter() + .flat_map(|role| &role.role_groups) + .flat_map(|(rolegroup_name, rolegroup)| { + std::iter::once((rolegroup_name.to_string(), rolegroup.replicas.unwrap_or(0))) + }) + // Order rolegroups consistently, to avoid spurious downstream rewrites + .collect::>(), + }) } } @@ -297,240 +339,54 @@ impl v1alpha1::KafkaCluster { /// /// Used for service discovery. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct KafkaPodRef { - pub namespace: String, - pub role_group_service_name: String, - pub pod_name: String, +pub struct KafkaPodDescriptor { + namespace: String, + role_group_service_name: String, + replica: u16, + cluster_domain: DomainName, + node_id: u32, } -impl KafkaPodRef { - pub fn fqdn(&self, cluster_info: &KubernetesClusterInfo) -> String { +impl KafkaPodDescriptor { + /// Return the fully qualified domain name + /// Format: `...svc.` + pub fn fqdn(&self) -> String { format!( "{pod_name}.{service_name}.{namespace}.svc.{cluster_domain}", - pod_name = self.pod_name, + pod_name = self.pod_name(), service_name = self.role_group_service_name, namespace = self.namespace, - cluster_domain = cluster_info.cluster_domain + cluster_domain = self.cluster_domain ) } -} - -#[derive( - Clone, - Debug, - Deserialize, - Display, - EnumIter, - Eq, - Hash, - JsonSchema, - PartialEq, - Serialize, - EnumString, -)] -pub enum KafkaRole { - #[strum(serialize = "broker")] - Broker, -} - -impl KafkaRole { - /// Metadata about a rolegroup - pub fn rolegroup_ref( - &self, - kafka: &v1alpha1::KafkaCluster, - group_name: impl Into, - ) -> RoleGroupRef { - RoleGroupRef { - cluster: ObjectRef::from_obj(kafka), - role: self.to_string(), - role_group: group_name.into(), - } - } - - pub fn roles() -> Vec { - let mut roles = vec![]; - for role in Self::iter() { - roles.push(role.to_string()) - } - roles - } - - /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. - /// We only have one role and will use "kafka" everywhere (which e.g. differs from the current hdfs implementation, - /// but is similar to HBase). - pub fn kerberos_service_name(&self) -> &'static str { - "kafka" - } -} - -#[derive(Clone, Debug, Default, PartialEq, Fragment, JsonSchema)] -#[fragment_attrs( - derive( - Clone, - Debug, - Default, - Deserialize, - JsonSchema, - Merge, - PartialEq, - Serialize - ), - serde(rename_all = "camelCase") -)] -pub struct Storage { - #[fragment_attrs(serde(default))] - pub log_dirs: PvcConfig, -} - -impl Storage { - pub fn build_pvcs(&self) -> Vec { - let data_pvc = self - .log_dirs - .build_pvc(LOG_DIRS_VOLUME_NAME, Some(vec!["ReadWriteOnce"])); - vec![data_pvc] - } -} - -#[derive( - Clone, - Debug, - Deserialize, - Display, - Eq, - EnumIter, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -#[serde(rename_all = "kebab-case")] -#[strum(serialize_all = "kebab-case")] -pub enum Container { - Vector, - KcatProber, - GetService, - Kafka, -} - -#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] -#[fragment_attrs( - derive( - Clone, - Debug, - Default, - Deserialize, - JsonSchema, - Merge, - PartialEq, - Serialize - ), - serde(rename_all = "camelCase") -)] -pub struct KafkaConfig { - #[fragment_attrs(serde(default))] - pub logging: Logging, - - #[fragment_attrs(serde(default))] - pub resources: Resources, - - #[fragment_attrs(serde(default))] - pub affinity: StackableAffinity, - - /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. - #[fragment_attrs(serde(default))] - pub graceful_shutdown_timeout: Option, - - /// The ListenerClass used for bootstrapping new clients. Should use a stable ListenerClass to avoid unnecessary client restarts (such as `cluster-internal` or `external-stable`). - pub bootstrap_listener_class: String, - - /// The ListenerClass used for connecting to brokers. Should use a direct connection ListenerClass to minimize cost and minimize performance overhead (such as `cluster-internal` or `external-unstable`). - pub broker_listener_class: String, - - /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. - /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. - #[fragment_attrs(serde(default))] - pub requested_secret_lifetime: Option, -} - -impl KafkaConfig { - // Auto TLS certificate lifetime - const DEFAULT_BROKER_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); - - pub fn default_config(cluster_name: &str, role: &KafkaRole) -> KafkaConfigFragment { - KafkaConfigFragment { - logging: product_logging::spec::default_logging(), - resources: ResourcesFragment { - cpu: CpuLimitsFragment { - min: Some(Quantity("250m".to_owned())), - max: Some(Quantity("1000m".to_owned())), - }, - memory: MemoryLimitsFragment { - limit: Some(Quantity("1Gi".to_owned())), - runtime_limits: NoRuntimeLimitsFragment {}, - }, - storage: StorageFragment { - log_dirs: PvcConfigFragment { - capacity: Some(Quantity("2Gi".to_owned())), - storage_class: None, - selectors: None, - }, - }, - }, - affinity: get_affinity(cluster_name, role), - graceful_shutdown_timeout: Some(DEFAULT_BROKER_GRACEFUL_SHUTDOWN_TIMEOUT), - bootstrap_listener_class: Some("cluster-internal".to_string()), - broker_listener_class: Some("cluster-internal".to_string()), - requested_secret_lifetime: Some(Self::DEFAULT_BROKER_SECRET_LIFETIME), - } - } -} - -impl Configuration for KafkaConfigFragment { - type Configurable = v1alpha1::KafkaCluster; - fn compute_env( - &self, - _resource: &Self::Configurable, - _role_name: &str, - ) -> Result>, stackable_operator::product_config_utils::Error> - { - Ok(BTreeMap::new()) + pub fn pod_name(&self) -> String { + format!("{}-{}", self.role_group_service_name, self.replica) } - fn compute_cli( - &self, - _resource: &Self::Configurable, - _role_name: &str, - ) -> Result>, stackable_operator::product_config_utils::Error> - { - Ok(BTreeMap::new()) + /// Build the Kraft voter String + /// See: + /// Example: 0@controller-0:1234:0000000000-00000000000 + /// * 0 is the replica id + /// * 0000000000-00000000000 is the replica directory id (even though the used Uuid states to be type 4 it does not work) + /// See: + /// * controller-0 is the replica's host, + /// * 1234 is the replica's port. + // NOTE(@maltesander): Even though the used Uuid states to be type 4 it does not work... 0000000000-00000000000 works... + pub fn as_voter(&self, port: u16) -> String { + format!( + "{node_id}@{fqdn}:{port}:0000000000-{node_id:0>11}", + node_id = self.node_id, + fqdn = self.fqdn(), + ) } - fn compute_files( - &self, - resource: &Self::Configurable, - _role_name: &str, - file: &str, - ) -> Result>, stackable_operator::product_config_utils::Error> - { - let mut config = BTreeMap::new(); - - if file == SERVER_PROPERTIES_FILE { - // OPA - if resource.spec.cluster_config.authorization.opa.is_some() { - config.insert( - "authorizer.class.name".to_string(), - Some("org.openpolicyagent.kafka.OpaAuthorizer".to_string()), - ); - config.insert( - "opa.authorizer.metrics.enabled".to_string(), - Some("true".to_string()), - ); - } - } - - Ok(config) + pub fn as_quorum_voter(&self, port: u16) -> String { + format!( + "{node_id}@{fqdn}:{port}", + node_id = self.node_id, + fqdn = self.fqdn(), + ) } } diff --git a/rust/operator-binary/src/crd/role/broker.rs b/rust/operator-binary/src/crd/role/broker.rs new file mode 100644 index 00000000..837ddd80 --- /dev/null +++ b/rust/operator-binary/src/crd/role/broker.rs @@ -0,0 +1,172 @@ +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; +use stackable_operator::{ + commons::resources::{ + CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, + PvcConfigFragment, Resources, ResourcesFragment, + }, + config::{fragment::Fragment, merge::Merge}, + k8s_openapi::apimachinery::pkg::api::resource::Quantity, + product_config_utils::Configuration, + product_logging::{self, spec::Logging}, + schemars::{self, JsonSchema}, +}; +use strum::{Display, EnumIter}; + +use crate::crd::{ + listener::KafkaListenerName, + role::{ + KAFKA_LOG_DIRS, KAFKA_PROCESS_ROLES, KafkaRole, + commons::{CommonConfig, Storage, StorageFragment}, + }, + v1alpha1, +}; + +pub const BROKER_PROPERTIES_FILE: &str = "broker.properties"; + +#[derive( + Clone, + Debug, + Deserialize, + Display, + Eq, + EnumIter, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum BrokerContainer { + Vector, + KcatProber, + GetService, + Kafka, +} + +#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + JsonSchema, + Merge, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct BrokerConfig { + #[fragment_attrs(serde(flatten))] + pub common_config: CommonConfig, + + /// The ListenerClass used for bootstrapping new clients. Should use a stable ListenerClass to avoid unnecessary client restarts (such as `cluster-internal` or `external-stable`). + pub bootstrap_listener_class: String, + + /// The ListenerClass used for connecting to brokers. Should use a direct connection ListenerClass to minimize cost and minimize performance overhead (such as `cluster-internal` or `external-unstable`). + pub broker_listener_class: String, + + #[fragment_attrs(serde(default))] + pub logging: Logging, + + #[fragment_attrs(serde(default))] + pub resources: Resources, +} + +impl BrokerConfig { + pub fn default_config(cluster_name: &str, role: &str) -> BrokerConfigFragment { + BrokerConfigFragment { + common_config: CommonConfig::default_config(cluster_name, role), + bootstrap_listener_class: Some("cluster-internal".to_string()), + broker_listener_class: Some("cluster-internal".to_string()), + logging: product_logging::spec::default_logging(), + resources: ResourcesFragment { + cpu: CpuLimitsFragment { + min: Some(Quantity("250m".to_owned())), + max: Some(Quantity("1000m".to_owned())), + }, + memory: MemoryLimitsFragment { + limit: Some(Quantity("2Gi".to_owned())), + runtime_limits: NoRuntimeLimitsFragment {}, + }, + storage: StorageFragment { + log_dirs: PvcConfigFragment { + capacity: Some(Quantity("2Gi".to_owned())), + storage_class: None, + selectors: None, + }, + }, + }, + } + } +} + +impl Configuration for BrokerConfigFragment { + type Configurable = v1alpha1::KafkaCluster; + + fn compute_env( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_cli( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_files( + &self, + resource: &Self::Configurable, + _role_name: &str, + file: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + let mut config = BTreeMap::new(); + + if file == BROKER_PROPERTIES_FILE { + config.insert( + KAFKA_LOG_DIRS.to_string(), + Some("/stackable/data/topicdata".to_string()), + ); + + // KRAFT + if resource.is_controller_configured() { + config.insert( + KAFKA_PROCESS_ROLES.to_string(), + Some(KafkaRole::Broker.to_string()), + ); + + config.insert( + "controller.listener.names".to_string(), + Some(KafkaListenerName::Controller.to_string()), + ); + } + // OPA + if resource.spec.cluster_config.authorization.opa.is_some() { + config.insert( + "authorizer.class.name".to_string(), + Some("org.openpolicyagent.kafka.OpaAuthorizer".to_string()), + ); + config.insert( + "opa.authorizer.metrics.enabled".to_string(), + Some("true".to_string()), + ); + } + } + + Ok(config) + } +} diff --git a/rust/operator-binary/src/crd/role/commons.rs b/rust/operator-binary/src/crd/role/commons.rs new file mode 100644 index 00000000..1ef6f3dc --- /dev/null +++ b/rust/operator-binary/src/crd/role/commons.rs @@ -0,0 +1,82 @@ +use serde::{Deserialize, Serialize}; +use stackable_operator::{ + commons::{affinity::StackableAffinity, resources::PvcConfig}, + config::{fragment::Fragment, merge::Merge}, + k8s_openapi::api::core::v1::PersistentVolumeClaim, + schemars::{self, JsonSchema}, + shared::time::Duration, +}; + +use crate::crd::affinity::get_affinity; + +#[derive(Clone, Debug, Default, PartialEq, Fragment, JsonSchema)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + JsonSchema, + Merge, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct Storage { + #[fragment_attrs(serde(default))] + pub log_dirs: PvcConfig, +} + +impl Storage { + pub const LOG_DIRS_VOLUME_NAME: &str = "log-dirs"; + + pub fn build_pvcs(&self) -> Vec { + let data_pvc = self + .log_dirs + .build_pvc(Self::LOG_DIRS_VOLUME_NAME, Some(vec!["ReadWriteOnce"])); + vec![data_pvc] + } +} + +#[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + Merge, + JsonSchema, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct CommonConfig { + #[fragment_attrs(serde(default))] + pub affinity: StackableAffinity, + + /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + #[fragment_attrs(serde(default))] + pub graceful_shutdown_timeout: Option, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, +} + +impl CommonConfig { + const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(30); + // Auto TLS certificate lifetime + const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); + + pub fn default_config(cluster_name: &str, role: &str) -> CommonConfigFragment { + CommonConfigFragment { + affinity: get_affinity(cluster_name, role), + graceful_shutdown_timeout: Some(Self::DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT), + requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME), + } + } +} diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs new file mode 100644 index 00000000..9be5464f --- /dev/null +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -0,0 +1,149 @@ +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; +use stackable_operator::{ + commons::resources::{ + CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, + PvcConfigFragment, Resources, ResourcesFragment, + }, + config::{fragment::Fragment, merge::Merge}, + k8s_openapi::apimachinery::pkg::api::resource::Quantity, + product_config_utils::Configuration, + product_logging::{self, spec::Logging}, + schemars::{self, JsonSchema}, +}; +use strum::{Display, EnumIter}; + +use crate::crd::{ + listener::KafkaListenerName, + role::{ + KAFKA_LOG_DIRS, KAFKA_PROCESS_ROLES, KafkaRole, + commons::{CommonConfig, Storage, StorageFragment}, + }, + v1alpha1, +}; + +pub const CONTROLLER_PROPERTIES_FILE: &str = "controller.properties"; + +#[derive( + Clone, + Debug, + Deserialize, + Display, + Eq, + EnumIter, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +#[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] +pub enum ControllerContainer { + Vector, + Kafka, +} + +#[derive(Debug, Default, PartialEq, Fragment, JsonSchema)] +#[fragment_attrs( + derive( + Clone, + Debug, + Default, + Deserialize, + JsonSchema, + Merge, + PartialEq, + Serialize + ), + serde(rename_all = "camelCase") +)] +pub struct ControllerConfig { + #[fragment_attrs(serde(flatten))] + pub common_config: CommonConfig, + + #[fragment_attrs(serde(default))] + pub logging: Logging, + + #[fragment_attrs(serde(default))] + pub resources: Resources, +} + +impl ControllerConfig { + pub fn default_config(cluster_name: &str, role: &str) -> ControllerConfigFragment { + ControllerConfigFragment { + common_config: CommonConfig::default_config(cluster_name, role), + logging: product_logging::spec::default_logging(), + resources: ResourcesFragment { + cpu: CpuLimitsFragment { + min: Some(Quantity("250m".to_owned())), + max: Some(Quantity("1000m".to_owned())), + }, + memory: MemoryLimitsFragment { + limit: Some(Quantity("1Gi".to_owned())), + runtime_limits: NoRuntimeLimitsFragment {}, + }, + storage: StorageFragment { + log_dirs: PvcConfigFragment { + capacity: Some(Quantity("2Gi".to_owned())), + storage_class: None, + selectors: None, + }, + }, + }, + } + } +} + +impl Configuration for ControllerConfigFragment { + type Configurable = v1alpha1::KafkaCluster; + + fn compute_env( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_cli( + &self, + _resource: &Self::Configurable, + _role_name: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + Ok(BTreeMap::new()) + } + + fn compute_files( + &self, + _resource: &Self::Configurable, + _role_name: &str, + file: &str, + ) -> Result>, stackable_operator::product_config_utils::Error> + { + let mut config = BTreeMap::new(); + + if file == CONTROLLER_PROPERTIES_FILE { + config.insert( + KAFKA_LOG_DIRS.to_string(), + Some("/stackable/data/kraft".to_string()), + ); + + // KRAFT + config.insert( + KAFKA_PROCESS_ROLES.to_string(), + Some(KafkaRole::Controller.to_string()), + ); + + config.insert( + "controller.listener.names".to_string(), + Some(KafkaListenerName::Controller.to_string()), + ); + } + + Ok(config) + } +} diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs new file mode 100644 index 00000000..06ae0b43 --- /dev/null +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -0,0 +1,448 @@ +pub mod broker; +pub mod commons; +pub mod controller; + +use std::{borrow::Cow, ops::Deref}; + +use serde::{Deserialize, Serialize}; +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + commons::resources::{NoRuntimeLimits, Resources}, + config::{ + fragment::{self, ValidationError}, + merge::Merge, + }, + k8s_openapi::api::core::v1::PodTemplateSpec, + kube::{ResourceExt, runtime::reflector::ObjectRef}, + product_logging::spec::ContainerLogConfig, + role_utils::RoleGroupRef, + schemars::{self, JsonSchema}, +}; +use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; + +use crate::{ + config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, + crd::role::{ + broker::{BROKER_PROPERTIES_FILE, BrokerConfig, BrokerConfigFragment}, + commons::{CommonConfig, Storage}, + controller::{CONTROLLER_PROPERTIES_FILE, ControllerConfig}, + }, + v1alpha1, +}; + +/// Env var +pub const KAFKA_NODE_ID_OFFSET: &str = "NODE_ID_OFFSET"; + +// See: https://kafka.apache.org/documentation/#brokerconfigs +/// The node ID associated with the roles this process is playing when process.roles is non-empty. +/// This is required configuration when running in KRaft mode. +pub const KAFKA_NODE_ID: &str = "node.id"; + +/// The roles that this process plays: 'broker', 'controller', or 'broker,controller' if it is both. +pub const KAFKA_PROCESS_ROLES: &str = "process.roles"; + +/// A comma-separated list of the directories where the topic data is stored. +pub const KAFKA_LOG_DIRS: &str = "log.dirs"; + +/// Listener List - Comma-separated list of URIs we will listen on and the listener names. +/// If the listener name is not a security protocol, listener.security.protocol.map must also be set. +pub const KAFKA_LISTENERS: &str = "listeners"; + +/// Specifies the listener addresses that the Kafka brokers will advertise to clients and other brokers. +/// The config is useful where the actual listener configuration 'listeners' does not represent the addresses that clients should use to connect, +/// such as in cloud environments. The addresses are published to and managed by the controller, the brokers pull these data from the controller as needed. +/// In IaaS environments, this may need to be different from the interface to which the broker binds. If this is not set, the value for 'listeners' will be used. +/// Unlike 'listeners', it is not valid to advertise the 0.0.0.0 meta-address. +/// Also unlike 'listeners', there can be duplicated ports in this property, so that one listener can be configured to advertise another listener's address. +/// This can be useful in some cases where external load balancers are used. +pub const KAFKA_ADVERTISED_LISTENERS: &str = "advertised.listeners"; + +/// Map between listener names and security protocols. This must be defined for the same security protocol to be usable in more than one port or IP. +/// For example, internal and external traffic can be separated even if SSL is required for both. +/// Concretely, the user could define listeners with names INTERNAL and EXTERNAL and this property as: INTERNAL:SSL,EXTERNAL:SSL +pub const KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: &str = "listener.security.protocol.map"; + +/// List of endpoints to use for bootstrapping the cluster metadata. The endpoints are specified in comma-separated list of {host}:{port} entries. +/// For example: localhost:9092,localhost:9093,localhost:9094. +pub const KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS: &str = "controller.quorum.bootstrap.servers"; + +/// Map of id/endpoint information for the set of voters in a comma-separated list of {id}@{host}:{port} entries. +/// For example: 1@localhost:9092,2@localhost:9093,3@localhost:9094 +pub const KAFKA_CONTROLLER_QUORUM_VOTERS: &str = "controller.quorum.voters"; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("fragment validation failure"))] + FragmentValidationFailure { source: ValidationError }, + + #[snafu(display("the Kafka role [{role}] is missing from spec"))] + MissingRole { + source: crate::crd::Error, + role: String, + }, + + #[snafu(display("missing role group {rolegroup:?} for role {role:?}"))] + MissingRoleGroup { role: String, rolegroup: String }, + + #[snafu(display("failed to construct JVM arguments"))] + ConstructJvmArguments { source: crate::config::jvm::Error }, +} + +#[derive( + Clone, + Debug, + Deserialize, + Display, + EnumIter, + Eq, + Hash, + JsonSchema, + PartialEq, + Serialize, + EnumString, +)] +pub enum KafkaRole { + #[strum(serialize = "broker")] + Broker, + #[strum(serialize = "controller")] + Controller, +} + +impl KafkaRole { + /// Return all available roles + pub fn roles() -> Vec { + let mut roles = vec![]; + for role in Self::iter() { + roles.push(role) + } + roles + } + + /// Metadata about a rolegroup + pub fn rolegroup_ref( + &self, + kafka: &v1alpha1::KafkaCluster, + group_name: impl Into, + ) -> RoleGroupRef { + RoleGroupRef { + cluster: ObjectRef::from_obj(kafka), + role: self.to_string(), + role_group: group_name.into(), + } + } + + /// A Kerberos principal has three parts, with the form username/fully.qualified.domain.name@YOUR-REALM.COM. + /// but is similar to HBase). + pub fn kerberos_service_name(&self) -> &'static str { + "kafka" + } + + /// Merge the [Broker|Controller]ConfigFragment defaults, role and role group settings. + /// The priority is: default < role config < role_group config + pub fn merged_config( + &self, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result { + match self { + Self::Broker => { + // Initialize the result with all default values as baseline + let default_config = + BrokerConfig::default_config(&kafka.name_any(), &self.to_string()); + + // Retrieve role resource config + let role = kafka.broker_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?; + + let mut role_config = role.config.config.clone(); + // Retrieve rolegroup specific resource config + let mut role_group_config = role + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .config + .config + .clone(); + + // Merge more specific configs into default config + // Hierarchy is: + // 1. RoleGroup + // 2. Role + // 3. Default + role_config.merge(&default_config); + role_group_config.merge(&role_config); + Ok(AnyConfig::Broker( + fragment::validate::(role_group_config) + .context(FragmentValidationFailureSnafu)?, + )) + } + Self::Controller => { + // Initialize the result with all default values as baseline + let default_config = + ControllerConfig::default_config(&kafka.name_any(), &self.to_string()); + + // Retrieve role resource config + let role = kafka.controller_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?; + + let mut role_config = role.config.config.clone(); + // Retrieve rolegroup specific resource config + let mut role_group_config = role + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .config + .config + .clone(); + + // Merge more specific configs into default config + // Hierarchy is: + // 1. RoleGroup + // 2. Role + // 3. Default + role_config.merge(&default_config); + role_group_config.merge(&role_config); + Ok(AnyConfig::Controller( + fragment::validate::(role_group_config) + .context(FragmentValidationFailureSnafu)?, + )) + } + } + } + + pub fn construct_non_heap_jvm_args( + &self, + merged_config: &AnyConfig, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result { + match self { + Self::Broker => construct_non_heap_jvm_args::( + merged_config, + kafka.broker_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + Self::Controller => construct_non_heap_jvm_args( + merged_config, + kafka.controller_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + } + } + + pub fn construct_heap_jvm_args( + &self, + merged_config: &AnyConfig, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result { + match self { + Self::Broker => construct_heap_jvm_args::( + merged_config, + kafka.broker_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + Self::Controller => construct_heap_jvm_args( + merged_config, + kafka.controller_role().with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })?, + rolegroup, + ) + .context(ConstructJvmArgumentsSnafu), + } + } + + pub fn role_pod_overrides( + &self, + kafka: &v1alpha1::KafkaCluster, + ) -> Result { + let pod_overrides = match self { + Self::Broker => kafka + .broker_role() + .with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })? + .config + .pod_overrides + .clone(), + Self::Controller => kafka + .controller_role() + .with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })? + .config + .pod_overrides + .clone(), + }; + + Ok(pod_overrides) + } + + pub fn role_group_pod_overrides( + &self, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result { + let pod_overrides = match self { + Self::Broker => kafka + .broker_role() + .with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .config + .pod_overrides + .clone(), + Self::Controller => kafka + .controller_role() + .with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .config + .pod_overrides + .clone(), + }; + + Ok(pod_overrides) + } + + pub fn replicas( + &self, + kafka: &v1alpha1::KafkaCluster, + rolegroup: &str, + ) -> Result, Error> { + let replicas = match self { + Self::Broker => { + kafka + .broker_role() + .with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .replicas + } + Self::Controller => { + kafka + .controller_role() + .with_context(|_| MissingRoleSnafu { + role: self.to_string(), + })? + .role_groups + .get(rolegroup) + .with_context(|| MissingRoleGroupSnafu { + role: self.to_string(), + rolegroup: rolegroup.to_string(), + })? + .replicas + } + }; + + Ok(replicas) + } +} + +/// Configuration for a role and rolegroup of an unknown type. +#[derive(Debug)] +pub enum AnyConfig { + Broker(BrokerConfig), + Controller(ControllerConfig), +} + +impl Deref for AnyConfig { + type Target = CommonConfig; + + fn deref(&self) -> &Self::Target { + match self { + AnyConfig::Broker(broker_config) => &broker_config.common_config, + AnyConfig::Controller(controller_config) => &controller_config.common_config, + } + } +} + +impl AnyConfig { + pub fn resources(&self) -> &Resources { + match self { + AnyConfig::Broker(broker_config) => &broker_config.resources, + AnyConfig::Controller(controller_config) => &controller_config.resources, + } + } + + // Logging config is distinct between each role, due to the different enum types, + // so provide helpers for containers that are common between all roles. + pub fn kafka_logging(&self) -> Cow { + match self { + AnyConfig::Broker(node) => node.logging.for_container(&broker::BrokerContainer::Kafka), + AnyConfig::Controller(node) => node + .logging + .for_container(&controller::ControllerContainer::Kafka), + } + } + + pub fn vector_logging(&self) -> Cow { + match &self { + AnyConfig::Broker(broker_config) => broker_config + .logging + .for_container(&broker::BrokerContainer::Vector), + AnyConfig::Controller(controller_config) => controller_config + .logging + .for_container(&controller::ControllerContainer::Vector), + } + } + + pub fn vector_logging_enabled(&self) -> bool { + match self { + AnyConfig::Broker(broker_config) => broker_config.logging.enable_vector_agent, + AnyConfig::Controller(controller_config) => { + controller_config.logging.enable_vector_agent + } + } + } + + pub fn listener_class(&self) -> Option<&String> { + match self { + AnyConfig::Broker(broker_config) => Some(&broker_config.broker_listener_class), + AnyConfig::Controller(_) => None, + } + } + + pub fn config_file_name(&self) -> &str { + match self { + AnyConfig::Broker(_) => BROKER_PROPERTIES_FILE, + AnyConfig::Controller(_) => CONTROLLER_PROPERTIES_FILE, + } + } +} diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index 4e846655..94fea587 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -6,7 +6,6 @@ //! This is required due to overlaps between TLS encryption and e.g. mTLS authentication or Kerberos use std::collections::BTreeMap; -use indoc::formatdoc; use snafu::{ResultExt, Snafu, ensure}; use stackable_operator::{ builder::{ @@ -20,20 +19,16 @@ use stackable_operator::{ client::Client, crd::authentication::core, k8s_openapi::api::core::v1::Volume, - product_logging::framework::{ - create_vector_shutdown_file_command, remove_vector_shutdown_file_command, - }, shared::time::Duration, - utils::COMMON_BASH_TRAP_FUNCTIONS, }; use super::listener::node_port_cmd; use crate::crd::{ - KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, SERVER_PROPERTIES_FILE, - STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, - STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_DIR, + LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_KERBEROS_KRB5_PATH, + STACKABLE_LISTENER_BROKER_DIR, authentication::{self, ResolvedAuthenticationClasses}, - listener::{self, KafkaListenerConfig, node_address_cmd}, + listener::{self, KafkaListenerName, node_address_cmd}, + role::KafkaRole, tls, v1alpha1, }; @@ -73,59 +68,13 @@ impl KafkaTlsSecurity { // be able to expose principals for both the broker and bootstrap in the // JAAS configuration, so that clients can use both. pub const BOOTSTRAP_PORT_NAME: &'static str = "bootstrap"; - // - Bootstrapper - const BOOTSTRAP_SSL_KEYSTORE_LOCATION: &'static str = - "listener.name.bootstrap.ssl.keystore.location"; - const BOOTSTRAP_SSL_KEYSTORE_PASSWORD: &'static str = - "listener.name.bootstrap.ssl.keystore.password"; - const BOOTSTRAP_SSL_KEYSTORE_TYPE: &'static str = "listener.name.bootstrap.ssl.keystore.type"; - const BOOTSTRAP_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.bootstrap.ssl.truststore.location"; - const BOOTSTRAP_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.bootstrap.ssl.truststore.password"; - const BOOTSTRAP_SSL_TRUSTSTORE_TYPE: &'static str = - "listener.name.bootstrap.ssl.truststore.type"; - const CLIENT_AUTH_SSL_CLIENT_AUTH: &'static str = "listener.name.client_auth.ssl.client.auth"; - // - TLS client authentication - const CLIENT_AUTH_SSL_KEYSTORE_LOCATION: &'static str = - "listener.name.client_auth.ssl.keystore.location"; - const CLIENT_AUTH_SSL_KEYSTORE_PASSWORD: &'static str = - "listener.name.client_auth.ssl.keystore.password"; - const CLIENT_AUTH_SSL_KEYSTORE_TYPE: &'static str = - "listener.name.client_auth.ssl.keystore.type"; - const CLIENT_AUTH_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.client_auth.ssl.truststore.location"; - const CLIENT_AUTH_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.client_auth.ssl.truststore.password"; - const CLIENT_AUTH_SSL_TRUSTSTORE_TYPE: &'static str = - "listener.name.client_auth.ssl.truststore.type"; pub const CLIENT_PORT: u16 = 9092; // ports pub const CLIENT_PORT_NAME: &'static str = "kafka"; - // - TLS client - const CLIENT_SSL_KEYSTORE_LOCATION: &'static str = "listener.name.client.ssl.keystore.location"; - const CLIENT_SSL_KEYSTORE_PASSWORD: &'static str = "listener.name.client.ssl.keystore.password"; - const CLIENT_SSL_KEYSTORE_TYPE: &'static str = "listener.name.client.ssl.keystore.type"; - const CLIENT_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.client.ssl.truststore.location"; - const CLIENT_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.client.ssl.truststore.password"; - const CLIENT_SSL_TRUSTSTORE_TYPE: &'static str = "listener.name.client.ssl.truststore.type"; // internal pub const INTERNAL_PORT: u16 = 19092; // - TLS internal const INTER_BROKER_LISTENER_NAME: &'static str = "inter.broker.listener.name"; - const INTER_SSL_CLIENT_AUTH: &'static str = "listener.name.internal.ssl.client.auth"; - const INTER_SSL_KEYSTORE_LOCATION: &'static str = - "listener.name.internal.ssl.keystore.location"; - const INTER_SSL_KEYSTORE_PASSWORD: &'static str = - "listener.name.internal.ssl.keystore.password"; - const INTER_SSL_KEYSTORE_TYPE: &'static str = "listener.name.internal.ssl.keystore.type"; - const INTER_SSL_TRUSTSTORE_LOCATION: &'static str = - "listener.name.internal.ssl.truststore.location"; - const INTER_SSL_TRUSTSTORE_PASSWORD: &'static str = - "listener.name.internal.ssl.truststore.password"; - const INTER_SSL_TRUSTSTORE_TYPE: &'static str = "listener.name.internal.ssl.truststore.type"; pub const SECURE_BOOTSTRAP_PORT: u16 = 9095; pub const SECURE_CLIENT_PORT: u16 = 9093; pub const SECURE_CLIENT_PORT_NAME: &'static str = "kafka-tls"; @@ -350,53 +299,9 @@ impl KafkaTlsSecurity { args } - /// Returns the commands to start the main Kafka container - pub fn kafka_container_commands( - &self, - kafka_listeners: &KafkaListenerConfig, - opa_connect_string: Option<&str>, - kerberos_enabled: bool, - ) -> Vec { - vec![formatdoc! {" - {COMMON_BASH_TRAP_FUNCTIONS} - {remove_vector_shutdown_file_command} - prepare_signal_handlers - containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & - {set_realm_env} - bin/kafka-server-start.sh {STACKABLE_CONFIG_DIR}/{SERVER_PROPERTIES_FILE} --override \"zookeeper.connect=$ZOOKEEPER\" --override \"listeners={listeners}\" --override \"advertised.listeners={advertised_listeners}\" --override \"listener.security.protocol.map={listener_security_protocol_map}\"{opa_config}{jaas_config} & - wait_for_termination $! - {create_vector_shutdown_file_command} - ", - remove_vector_shutdown_file_command = - remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), - create_vector_shutdown_file_command = - create_vector_shutdown_file_command(STACKABLE_LOG_DIR), - set_realm_env = match kerberos_enabled { - true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {})", STACKABLE_KERBEROS_KRB5_PATH), - false => "".to_string(), - }, - listeners = kafka_listeners.listeners(), - advertised_listeners = kafka_listeners.advertised_listeners(), - listener_security_protocol_map = kafka_listeners.listener_security_protocol_map(), - opa_config = match opa_connect_string { - None => "".to_string(), - Some(opa_connect_string) => format!(" --override \"opa.authorizer.url={opa_connect_string}\""), - }, - jaas_config = match kerberos_enabled { - true => { - let service_name = KafkaRole::Broker.kerberos_service_name(); - let broker_address = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR); - let bootstrap_address = node_address_cmd(STACKABLE_LISTENER_BOOTSTRAP_DIR); - // TODO replace client and bootstrap below with constants - format!(" --override \"listener.name.client.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{broker_address}@$KERBEROS_REALM\\\";\" --override \"listener.name.bootstrap.gssapi.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true isInitiator=false keyTab=\\\"/stackable/kerberos/keytab\\\" principal=\\\"{service_name}/{bootstrap_address}@$KERBEROS_REALM\\\";\"").to_string()}, - false => "".to_string(), - }, - }] - } - - /// Adds required volumes and volume mounts to the pod and container builders + /// Adds required volumes and volume mounts to the broker pod and container builders /// depending on the tls and authentication settings. - pub fn add_volume_and_volume_mounts( + pub fn add_broker_volume_and_volume_mounts( &self, pod_builder: &mut PodBuilder, cb_kcat_prober: &mut ContainerBuilder, @@ -454,9 +359,43 @@ impl KafkaTlsSecurity { Ok(()) } - /// Returns required Kafka configuration settings for the `server.properties` file + /// Adds required volumes and volume mounts to the controller pod and container builders /// depending on the tls and authentication settings. - pub fn config_settings(&self) -> BTreeMap { + pub fn add_controller_volume_and_volume_mounts( + &self, + pod_builder: &mut PodBuilder, + cb_kafka: &mut ContainerBuilder, + requested_secret_lifetime: &Duration, + ) -> Result<(), Error> { + if let Some(tls_internal_secret_class) = self.tls_internal_secret_class() { + pod_builder + .add_volume( + VolumeBuilder::new(Self::STACKABLE_TLS_KAFKA_INTERNAL_VOLUME_NAME) + .ephemeral( + SecretOperatorVolumeSourceBuilder::new(tls_internal_secret_class) + .with_pod_scope() + .with_format(SecretFormat::TlsPkcs12) + .with_auto_tls_cert_lifetime(*requested_secret_lifetime) + .build() + .context(SecretVolumeBuildSnafu)?, + ) + .build(), + ) + .context(AddVolumeSnafu)?; + cb_kafka + .add_volume_mount( + Self::STACKABLE_TLS_KAFKA_INTERNAL_VOLUME_NAME, + Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR, + ) + .context(AddVolumeMountSnafu)?; + } + + Ok(()) + } + + /// Returns required Kafka configuration settings for the `broker.properties` file + /// depending on the tls and authentication settings. + pub fn broker_config_settings(&self) -> BTreeMap { let mut config = BTreeMap::new(); // We set either client tls with authentication or client tls without authentication @@ -464,57 +403,57 @@ impl KafkaTlsSecurity { // be trusted. if self.tls_client_authentication_class().is_some() { config.insert( - Self::CLIENT_AUTH_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_AUTH_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_AUTH_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::CLIENT_AUTH_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_AUTH_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_truststore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_AUTH_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_truststore_type(), "PKCS12".to_string(), ); // client auth required config.insert( - Self::CLIENT_AUTH_SSL_CLIENT_AUTH.to_string(), + KafkaListenerName::ClientAuth.listener_ssl_client_auth(), "required".to_string(), ); } else if self.tls_server_secret_class().is_some() { config.insert( - Self::CLIENT_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::Client.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::Client.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::Client.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::CLIENT_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::Client.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::CLIENT_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::Client.listener_ssl_truststore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::CLIENT_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::Client.listener_ssl_truststore_type(), "PKCS12".to_string(), ); } @@ -522,59 +461,86 @@ impl KafkaTlsSecurity { if self.has_kerberos_enabled() { // Bootstrap config.insert( - Self::BOOTSTRAP_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::BOOTSTRAP_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::BOOTSTRAP_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::BOOTSTRAP_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_SERVER_DIR), ); config.insert( - Self::BOOTSTRAP_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_truststore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::BOOTSTRAP_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::Bootstrap.listener_ssl_truststore_type(), "PKCS12".to_string(), ); } // Internal TLS if self.tls_internal_secret_class().is_some() { + // BROKERS config.insert( - Self::INTER_SSL_KEYSTORE_LOCATION.to_string(), + KafkaListenerName::Internal.listener_ssl_keystore_location(), format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), ); config.insert( - Self::INTER_SSL_KEYSTORE_PASSWORD.to_string(), + KafkaListenerName::Internal.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::INTER_SSL_KEYSTORE_TYPE.to_string(), + KafkaListenerName::Internal.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::INTER_SSL_TRUSTSTORE_LOCATION.to_string(), + KafkaListenerName::Internal.listener_ssl_truststore_location(), format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), ); config.insert( - Self::INTER_SSL_TRUSTSTORE_PASSWORD.to_string(), + KafkaListenerName::Internal.listener_ssl_truststore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_truststore_type(), + "PKCS12".to_string(), + ); + // CONTROLLERS + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_location(), + format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_password(), Self::SSL_STORE_PASSWORD.to_string(), ); config.insert( - Self::INTER_SSL_TRUSTSTORE_TYPE.to_string(), + KafkaListenerName::Controller.listener_ssl_keystore_type(), "PKCS12".to_string(), ); config.insert( - Self::INTER_SSL_CLIENT_AUTH.to_string(), + KafkaListenerName::Controller.listener_ssl_truststore_location(), + format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_type(), + "PKCS12".to_string(), + ); + // client auth required + config.insert( + KafkaListenerName::Internal.listener_ssl_client_auth(), "required".to_string(), ); } @@ -602,6 +568,68 @@ impl KafkaTlsSecurity { config } + /// Returns required Kafka configuration settings for the `controller.properties` file + /// depending on the tls and authentication settings. + pub fn controller_config_settings(&self) -> BTreeMap { + let mut config = BTreeMap::new(); + + // We set either client tls with authentication or client tls without authentication + // If authentication is explicitly required we do not want to have any other CAs to + // be trusted. + if self.tls_client_authentication_class().is_some() { + // client auth required + config.insert( + KafkaListenerName::ControllerAuth.listener_ssl_client_auth(), + "required".to_string(), + ); + } + + if self.tls_client_authentication_class().is_some() + || self.tls_internal_secret_class().is_some() + { + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_location(), + format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_keystore_type(), + "PKCS12".to_string(), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_location(), + format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Controller.listener_ssl_truststore_type(), + "PKCS12".to_string(), + ); + } + + // Kerberos + if self.has_kerberos_enabled() { + config.insert("sasl.enabled.mechanisms".to_string(), "GSSAPI".to_string()); + config.insert( + "sasl.kerberos.service.name".to_string(), + KafkaRole::Controller.kerberos_service_name().to_string(), + ); + config.insert( + "sasl.mechanism.inter.broker.protocol".to_string(), + "GSSAPI".to_string(), + ); + tracing::debug!("Kerberos configs added: [{:#?}]", config); + } + + config + } + /// Returns the `SecretClass` provided in a `AuthenticationClass` for TLS. fn get_tls_secret_class(&self) -> Option<&String> { self.resolved_authentication_classes diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index 7a6d01b4..ca76eafd 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -5,12 +5,12 @@ use stackable_operator::{ builder::{configmap::ConfigMapBuilder, meta::ObjectMetaBuilder}, commons::product_image_selection::ResolvedProductImage, crd::listener, - k8s_openapi::api::core::v1::{ConfigMap, Service}, + k8s_openapi::api::core::v1::ConfigMap, kube::{Resource, ResourceExt, runtime::reflector::ObjectRef}, }; use crate::{ - crd::{KafkaRole, security::KafkaTlsSecurity, v1alpha1}, + crd::{role::KafkaRole, security::KafkaTlsSecurity, v1alpha1}, kafka_controller::KAFKA_CONTROLLER_NAME, utils::build_recommended_labels, }; @@ -26,21 +26,9 @@ pub enum Error { #[snafu(display("object has no name associated"))] NoName, - #[snafu(display("object has no namespace associated"))] - NoNamespace, - #[snafu(display("could not find service port with name {}", port_name))] NoServicePort { port_name: String }, - #[snafu(display("service port with name {} does not have a nodePort", port_name))] - NoNodePort { port_name: String }, - - #[snafu(display("could not find Endpoints for {}", svc))] - FindEndpoints { - source: stackable_operator::client::Error, - svc: ObjectRef, - }, - #[snafu(display("nodePort was out of range"))] InvalidNodePort { source: TryFromIntError }, diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 98e6240b..47b701a7 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -1,66 +1,28 @@ //! Ensures that `Pod`s are configured and running for each [`v1alpha1::KafkaCluster`]. -use std::{ - borrow::Cow, - collections::{BTreeMap, HashMap}, - sync::Arc, -}; +use std::{collections::HashMap, str::FromStr, sync::Arc}; use const_format::concatcp; -use product_config::{ - ProductConfigManager, - types::PropertyNameKind, - writer::{PropertiesWriterError, to_java_properties_string}, -}; -use snafu::{OptionExt, ResultExt, Snafu}; +use product_config::{ProductConfigManager, types::PropertyNameKind}; +use snafu::{ResultExt, Snafu}; use stackable_operator::{ - builder::{ - self, - configmap::ConfigMapBuilder, - meta::ObjectMetaBuilder, - pod::{ - PodBuilder, - container::ContainerBuilder, - resources::ResourceRequirementsBuilder, - security::PodSecurityContextBuilder, - volume::{ListenerOperatorVolumeSourceBuilder, ListenerReference, VolumeBuilder}, - }, - }, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, commons::{ opa::OpaApiVersion, - product_image_selection::{self, ResolvedProductImage}, + product_image_selection::{self}, rbac::build_rbac_resources, }, - crd::{authentication::core, listener}, - k8s_openapi::{ - DeepMerge, - api::{ - apps::v1::{StatefulSet, StatefulSetSpec}, - core::v1::{ - ConfigMap, ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, - EnvVarSource, ExecAction, ObjectFieldSelector, PodSpec, Probe, Service, - ServiceAccount, ServiceSpec, Volume, - }, - }, - apimachinery::pkg::apis::meta::v1::LabelSelector, - }, + crd::listener, kube::{ - Resource, ResourceExt, + Resource, api::DynamicObject, core::{DeserializeGuard, error_boundary}, runtime::{controller::Action, reflector::ObjectRef}, }, - kvp::{Label, Labels}, logging::controller::ReconcilerError, - product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, - product_logging::{ - self, - framework::LoggingError, - spec::{ - ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, - CustomContainerLogConfig, - }, + product_config_utils::{ + ValidatedRoleConfigByPropertyKind, transform_all_roles_to_config, + validate_all_roles_and_groups_config, }, role_utils::{GenericRoleConfig, RoleGroupRef}, shared::time::Duration, @@ -68,31 +30,28 @@ use stackable_operator::{ compute_conditions, operations::ClusterOperationsConditionBuilder, statefulset::StatefulSetConditionBuilder, }, - utils::cluster_info::KubernetesClusterInfo, }; use strum::{EnumDiscriminants, IntoStaticStr}; use crate::{ - config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::{ - APP_NAME, Container, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KAFKA_HEAP_OPTS, - KafkaClusterStatus, KafkaConfig, KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, - LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, - OPERATOR_NAME, SERVER_PROPERTIES_FILE, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, - STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, STACKABLE_LOG_CONFIG_DIR, - STACKABLE_LOG_DIR, - listener::{KafkaListenerError, get_kafka_listener_config}, + self, APP_NAME, DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, KafkaClusterStatus, + OPERATOR_NAME, + role::{ + AnyConfig, KafkaRole, broker::BROKER_PROPERTIES_FILE, + controller::CONTROLLER_PROPERTIES_FILE, + }, security::KafkaTlsSecurity, v1alpha1, }, discovery::{self, build_discovery_configmap}, - kerberos::{self, add_kerberos_pod_config}, - operations::{ - graceful_shutdown::{add_graceful_shutdown_config, graceful_shutdown_config_properties}, - pdb::add_pdbs, + operations::pdb::add_pdbs, + resource::{ + configmap::build_rolegroup_config_map, + listener::build_broker_rolegroup_bootstrap_listener, + service::build_rolegroup_service, + statefulset::{build_broker_rolegroup_statefulset, build_controller_rolegroup_statefulset}, }, - product_logging::{LOG4J_CONFIG_FILE, MAX_KAFKA_LOG_FILES_SIZE, extend_role_group_config_map}, - utils::build_recommended_labels, }; pub const KAFKA_CONTROLLER_NAME: &str = "kafkacluster"; @@ -107,45 +66,23 @@ pub struct Ctx { #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { - #[snafu(display("missing secret lifetime"))] - MissingSecretLifetime, - - #[snafu(display("object has no name"))] - ObjectHasNoName, - - #[snafu(display("object has no namespace"))] - ObjectHasNoNamespace, - - #[snafu(display("object defines no broker role"))] - NoBrokerRole, + #[snafu(display("cluster object defines no '{role}' role"))] + MissingKafkaRole { + source: crate::crd::Error, + role: KafkaRole, + }, #[snafu(display("failed to apply role Service"))] ApplyRoleService { source: stackable_operator::cluster_resources::Error, }, - #[snafu(display("failed to apply role ServiceAccount"))] - ApplyRoleServiceAccount { - source: stackable_operator::cluster_resources::Error, - }, - - #[snafu(display("failed to apply global RoleBinding"))] - ApplyRoleRoleBinding { - source: stackable_operator::cluster_resources::Error, - }, - #[snafu(display("failed to apply Service for {}", rolegroup))] ApplyRoleGroupService { source: stackable_operator::cluster_resources::Error, rolegroup: RoleGroupRef, }, - #[snafu(display("failed to build ConfigMap for {}", rolegroup))] - BuildRoleGroupConfig { - source: stackable_operator::builder::configmap::Error, - rolegroup: RoleGroupRef, - }, - #[snafu(display("failed to apply ConfigMap for {}", rolegroup))] ApplyRoleGroupConfig { source: stackable_operator::cluster_resources::Error, @@ -168,17 +105,6 @@ pub enum Error { source: stackable_operator::product_config_utils::Error, }, - #[snafu(display("failed to serialize zoo.cfg for {}", rolegroup))] - SerializeZooCfg { - source: PropertiesWriterError, - rolegroup: RoleGroupRef, - }, - - #[snafu(display("object is missing metadata to build owner reference"))] - ObjectMissingMetadataForOwnerRef { - source: stackable_operator::builder::meta::Error, - }, - #[snafu(display("failed to build discovery ConfigMap"))] BuildDiscoveryConfig { source: discovery::Error }, @@ -187,49 +113,11 @@ pub enum Error { source: stackable_operator::cluster_resources::Error, }, - #[snafu(display("failed to find rolegroup {}", rolegroup))] - RoleGroupNotFound { - rolegroup: RoleGroupRef, - }, - #[snafu(display("invalid OpaConfig"))] InvalidOpaConfig { source: stackable_operator::commons::opa::Error, }, - #[snafu(display("failed to retrieve {}", authentication_class))] - AuthenticationClassRetrieval { - source: stackable_operator::commons::opa::Error, - authentication_class: ObjectRef, - }, - - #[snafu(display( - "failed to use authentication provider {} - supported methods: {:?}", - provider, - supported - ))] - AuthenticationProviderNotSupported { - authentication_class: ObjectRef, - supported: Vec, - provider: String, - }, - - #[snafu(display("invalid kafka listeners"))] - InvalidKafkaListeners { - source: crate::crd::listener::KafkaListenerError, - }, - - #[snafu(display("failed to add listener volume"))] - AddListenerVolume { - source: stackable_operator::builder::pod::Error, - }, - - #[snafu(display("invalid container name [{name}]"))] - InvalidContainerName { - name: String, - source: stackable_operator::builder::pod::container::Error, - }, - #[snafu(display("failed to delete orphaned resources"))] DeleteOrphans { source: stackable_operator::cluster_resources::Error, @@ -244,16 +132,7 @@ pub enum Error { }, #[snafu(display("failed to resolve and merge config for role and role group"))] - FailedToResolveConfig { source: crate::crd::Error }, - - #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] - VectorAggregatorConfigMapMissing, - - #[snafu(display("failed to add the logging configuration to the ConfigMap [{cm_name}]"))] - InvalidLoggingConfig { - source: crate::product_logging::Error, - cm_name: String, - }, + FailedToResolveConfig { source: crate::crd::role::Error }, #[snafu(display("failed to patch service account"))] ApplyServiceAccount { @@ -275,79 +154,55 @@ pub enum Error { source: stackable_operator::commons::rbac::Error, }, - #[snafu(display("internal operator failure"))] - InternalOperatorError { source: crate::crd::Error }, - - #[snafu(display( - "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", - rolegroup - ))] - JvmSecurityPoperties { - source: PropertiesWriterError, - rolegroup: String, - }, - #[snafu(display("failed to create PodDisruptionBudget"))] FailedToCreatePdb { source: crate::operations::pdb::Error, }, - #[snafu(display("failed to configure graceful shutdown"))] - GracefulShutdown { - source: crate::operations::graceful_shutdown::Error, - }, - #[snafu(display("failed to get required Labels"))] GetRequiredLabels { source: stackable_operator::kvp::KeyValuePairError, }, - #[snafu(display("failed to build Metadata"))] - MetadataBuild { - source: stackable_operator::builder::meta::Error, - }, - - #[snafu(display("failed to build Labels"))] - LabelBuild { - source: stackable_operator::kvp::LabelError, - }, - - #[snafu(display("failed to add Secret Volumes and VolumeMounts"))] - AddVolumesAndVolumeMounts { source: crate::crd::security::Error }, - - #[snafu(display("failed to resolve the fully-qualified pod name"))] - ResolveNamespace { source: KafkaListenerError }, - - #[snafu(display("failed to add kerberos config"))] - AddKerberosConfig { source: kerberos::Error }, - #[snafu(display("failed to validate authentication method"))] FailedToValidateAuthenticationMethod { source: crate::crd::security::Error }, - #[snafu(display("failed to add needed volume"))] - AddVolume { source: builder::pod::Error }, - - #[snafu(display("failed to add needed volumeMount"))] - AddVolumeMount { - source: builder::pod::container::Error, - }, - - #[snafu(display("failed to configure logging"))] - ConfigureLogging { source: LoggingError }, - #[snafu(display("KafkaCluster object is invalid"))] InvalidKafkaCluster { source: error_boundary::InvalidObject, }, - #[snafu(display("failed to construct JVM arguments"))] - ConstructJvmArguments { source: crate::config::jvm::Error }, + #[snafu(display("KafkaCluster object is misconfigured"))] + MisconfiguredKafkaCluster { source: crd::Error }, #[snafu(display("failed to resolve product image"))] ResolveProductImage { source: product_image_selection::Error, }, + + #[snafu(display("failed to parse role: {source}"))] + ParseRole { source: strum::ParseError }, + + #[snafu(display("failed to build statefulset"))] + BuildStatefulset { + source: crate::resource::statefulset::Error, + }, + + #[snafu(display("failed to build configmap"))] + BuildConfigMap { + source: crate::resource::configmap::Error, + }, + + #[snafu(display("failed to build service"))] + BuildService { + source: crate::resource::service::Error, + }, + + #[snafu(display("failed to build listener"))] + BuildListener { + source: crate::resource::listener::Error, + }, } type Result = std::result::Result; @@ -358,63 +213,35 @@ impl ReconcilerError for Error { fn secondary_object(&self) -> Option> { match self { - Error::MissingSecretLifetime => None, - Error::ObjectHasNoName => None, - Error::ObjectHasNoNamespace => None, - Error::NoBrokerRole => None, + Error::MissingKafkaRole { .. } => None, Error::ApplyRoleService { .. } => None, - Error::ApplyRoleServiceAccount { .. } => None, - Error::ApplyRoleRoleBinding { .. } => None, Error::ApplyRoleGroupService { .. } => None, - Error::BuildRoleGroupConfig { .. } => None, Error::ApplyRoleGroupConfig { .. } => None, Error::ApplyRoleGroupStatefulSet { .. } => None, Error::GenerateProductConfig { .. } => None, Error::InvalidProductConfig { .. } => None, - Error::SerializeZooCfg { .. } => None, - Error::ObjectMissingMetadataForOwnerRef { .. } => None, Error::BuildDiscoveryConfig { .. } => None, Error::ApplyDiscoveryConfig { .. } => None, - Error::RoleGroupNotFound { .. } => None, Error::InvalidOpaConfig { .. } => None, - Error::AuthenticationClassRetrieval { - authentication_class, - .. - } => Some(authentication_class.clone().erase()), - Error::AuthenticationProviderNotSupported { - authentication_class, - .. - } => Some(authentication_class.clone().erase()), - Error::InvalidKafkaListeners { .. } => None, - Error::AddListenerVolume { .. } => None, - Error::InvalidContainerName { .. } => None, Error::DeleteOrphans { .. } => None, Error::FailedToInitializeSecurityContext { .. } => None, Error::CreateClusterResources { .. } => None, Error::FailedToResolveConfig { .. } => None, - Error::VectorAggregatorConfigMapMissing => None, - Error::InvalidLoggingConfig { .. } => None, Error::ApplyServiceAccount { .. } => None, Error::ApplyRoleBinding { .. } => None, Error::ApplyStatus { .. } => None, Error::BuildRbacResources { .. } => None, - Error::InternalOperatorError { .. } => None, - Error::JvmSecurityPoperties { .. } => None, Error::FailedToCreatePdb { .. } => None, - Error::GracefulShutdown { .. } => None, Error::GetRequiredLabels { .. } => None, - Error::MetadataBuild { .. } => None, - Error::LabelBuild { .. } => None, - Error::AddVolumesAndVolumeMounts { .. } => None, - Error::ConfigureLogging { .. } => None, - Error::AddVolume { .. } => None, - Error::AddVolumeMount { .. } => None, - Error::ResolveNamespace { .. } => None, - Error::AddKerberosConfig { .. } => None, Error::FailedToValidateAuthenticationMethod { .. } => None, Error::InvalidKafkaCluster { .. } => None, - Error::ConstructJvmArguments { .. } => None, + Error::MisconfiguredKafkaCluster { .. } => None, Error::ResolveProductImage { .. } => None, + Error::ParseRole { .. } => None, + Error::BuildStatefulset { .. } => None, + Error::BuildConfigMap { .. } => None, + Error::BuildService { .. } => None, + Error::BuildListener { .. } => None, } } } @@ -432,7 +259,6 @@ pub async fn reconcile_kafka( .context(InvalidKafkaClusterSnafu)?; let client = &ctx.client; - let kafka_role = KafkaRole::Broker; let resolved_product_image = kafka .spec @@ -440,6 +266,11 @@ pub async fn reconcile_kafka( .resolve(DOCKER_IMAGE_BASE_NAME, crate::built_info::PKG_VERSION) .context(ResolveProductImageSnafu)?; + // check Kraft vs ZooKeeper and fail if misconfigured + kafka + .check_kraft_vs_zookeeper(&resolved_product_image.product_version) + .context(MisconfiguredKafkaClusterSnafu)?; + let mut cluster_resources = ClusterResources::new( APP_NAME, OPERATOR_NAME, @@ -449,33 +280,11 @@ pub async fn reconcile_kafka( ) .context(CreateClusterResourcesSnafu)?; - let validated_config = validate_all_roles_and_groups_config( + let validated_config = validated_product_config( + kafka, &resolved_product_image.product_version, - &transform_all_roles_to_config( - kafka, - [( - KafkaRole::Broker.to_string(), - ( - vec![ - PropertyNameKind::File(SERVER_PROPERTIES_FILE.to_string()), - PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), - PropertyNameKind::Env, - ], - kafka.spec.brokers.clone().context(NoBrokerRoleSnafu)?, - ), - )] - .into(), - ) - .context(GenerateProductConfigSnafu)?, &ctx.product_config, - false, - false, - ) - .context(InvalidProductConfigSnafu)?; - let role_broker_config = validated_config - .get(&KafkaRole::Broker.to_string()) - .map(Cow::Borrowed) - .unwrap_or_default(); + )?; let kafka_security = KafkaTlsSecurity::new_from_kafka_cluster(client, kafka) .await @@ -528,80 +337,107 @@ pub async fn reconcile_kafka( let mut bootstrap_listeners = Vec::::new(); - for (rolegroup_name, rolegroup_config) in role_broker_config.iter() { - let rolegroup_ref = kafka.broker_rolegroup_ref(rolegroup_name); - - let merged_config = kafka - .merged_config(&KafkaRole::Broker, &rolegroup_ref) - .context(FailedToResolveConfigSnafu)?; - - let rg_service = - build_broker_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref)?; - let rg_configmap = build_broker_rolegroup_config_map( - kafka, - &resolved_product_image, - &kafka_security, - &rolegroup_ref, - rolegroup_config, - &merged_config, - )?; - let rg_statefulset = build_broker_rolegroup_statefulset( - kafka, - &kafka_role, - &resolved_product_image, - &rolegroup_ref, - rolegroup_config, - opa_connect.as_deref(), - &kafka_security, - &merged_config, - &rbac_sa, - &client.kubernetes_cluster_info, - )?; - let rg_bootstrap_listener = build_broker_rolegroup_bootstrap_listener( - kafka, - &resolved_product_image, - &kafka_security, - &rolegroup_ref, - &merged_config, - )?; - - bootstrap_listeners.push( + for (kafka_role_str, role_config) in &validated_config { + let kafka_role = KafkaRole::from_str(kafka_role_str).context(ParseRoleSnafu)?; + + for (rolegroup_name, rolegroup_config) in role_config.iter() { + let rolegroup_ref = kafka.rolegroup_ref(&kafka_role, rolegroup_name); + + let merged_config = kafka_role + .merged_config(kafka, &rolegroup_ref.role_group) + .context(FailedToResolveConfigSnafu)?; + + let rg_service = + build_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref) + .context(BuildServiceSnafu)?; + + let rg_configmap = build_rolegroup_config_map( + kafka, + &resolved_product_image, + &kafka_security, + &rolegroup_ref, + rolegroup_config, + &merged_config, + ) + .context(BuildConfigMapSnafu)?; + + let rg_statefulset = match kafka_role { + KafkaRole::Broker => build_broker_rolegroup_statefulset( + kafka, + &kafka_role, + &resolved_product_image, + &rolegroup_ref, + rolegroup_config, + opa_connect.as_deref(), + &kafka_security, + &merged_config, + &rbac_sa, + &client.kubernetes_cluster_info, + ) + .context(BuildStatefulsetSnafu)?, + KafkaRole::Controller => build_controller_rolegroup_statefulset( + kafka, + &kafka_role, + &resolved_product_image, + &rolegroup_ref, + rolegroup_config, + &kafka_security, + &merged_config, + &rbac_sa, + &client.kubernetes_cluster_info, + ) + .context(BuildStatefulsetSnafu)?, + }; + + if let AnyConfig::Broker(broker_config) = merged_config { + let rg_bootstrap_listener = build_broker_rolegroup_bootstrap_listener( + kafka, + &resolved_product_image, + &kafka_security, + &rolegroup_ref, + &broker_config, + ) + .context(BuildListenerSnafu)?; + bootstrap_listeners.push( + cluster_resources + .add(client, rg_bootstrap_listener) + .await + .context(ApplyRoleServiceSnafu)?, + ); + } + cluster_resources - .add(client, rg_bootstrap_listener) + .add(client, rg_service) .await - .context(ApplyRoleServiceSnafu)?, - ); - cluster_resources - .add(client, rg_service) - .await - .with_context(|_| ApplyRoleGroupServiceSnafu { - rolegroup: rolegroup_ref.clone(), - })?; - cluster_resources - .add(client, rg_configmap) - .await - .with_context(|_| ApplyRoleGroupConfigSnafu { - rolegroup: rolegroup_ref.clone(), - })?; - - ss_cond_builder.add( + .with_context(|_| ApplyRoleGroupServiceSnafu { + rolegroup: rolegroup_ref.clone(), + })?; cluster_resources - .add(client, rg_statefulset) + .add(client, rg_configmap) .await - .with_context(|_| ApplyRoleGroupStatefulSetSnafu { + .with_context(|_| ApplyRoleGroupConfigSnafu { rolegroup: rolegroup_ref.clone(), - })?, - ); - } + })?; + + ss_cond_builder.add( + cluster_resources + .add(client, rg_statefulset) + .await + .with_context(|_| ApplyRoleGroupStatefulSetSnafu { + rolegroup: rolegroup_ref.clone(), + })?, + ); + } - let role_config = kafka.role_config(&kafka_role); - if let Some(GenericRoleConfig { - pod_disruption_budget: pdb, - }) = role_config - { - add_pdbs(pdb, kafka, &kafka_role, client, &mut cluster_resources) - .await - .context(FailedToCreatePdbSnafu)?; + let role_config = kafka.role_config(&kafka_role); + if let Some(GenericRoleConfig { + pod_disruption_budget: pdb, + }) = role_config + { + add_pdbs(pdb, kafka, &kafka_role, client, &mut cluster_resources) + .await + .context(FailedToCreatePdbSnafu)?; + } } let discovery_cm = build_discovery_configmap( @@ -638,541 +474,6 @@ pub async fn reconcile_kafka( Ok(Action::await_change()) } -/// Kafka clients will use the load-balanced bootstrap listener to get a list of broker addresses and will use those to -/// transmit data to the correct broker. -// TODO (@NickLarsenNZ): Move shared functionality to stackable-operator -pub fn build_broker_rolegroup_bootstrap_listener( - kafka: &v1alpha1::KafkaCluster, - resolved_product_image: &ResolvedProductImage, - kafka_security: &KafkaTlsSecurity, - rolegroup: &RoleGroupRef, - merged_config: &KafkaConfig, -) -> Result { - Ok(listener::v1alpha1::Listener { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(kafka.bootstrap_service_name(rolegroup)) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(MetadataBuildSnafu)? - .build(), - spec: listener::v1alpha1::ListenerSpec { - class_name: Some(merged_config.bootstrap_listener_class.clone()), - ports: Some(listener_ports(kafka_security)), - ..listener::v1alpha1::ListenerSpec::default() - }, - status: None, - }) -} - -/// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator -fn build_broker_rolegroup_config_map( - kafka: &v1alpha1::KafkaCluster, - resolved_product_image: &ResolvedProductImage, - kafka_security: &KafkaTlsSecurity, - rolegroup: &RoleGroupRef, - broker_config: &HashMap>, - merged_config: &KafkaConfig, -) -> Result { - let mut server_cfg = broker_config - .get(&PropertyNameKind::File(SERVER_PROPERTIES_FILE.to_string())) - .cloned() - .unwrap_or_default(); - - server_cfg.extend(kafka_security.config_settings()); - server_cfg.extend(graceful_shutdown_config_properties()); - - let server_cfg = server_cfg - .into_iter() - .map(|(k, v)| (k, Some(v))) - .collect::>(); - - let jvm_sec_props: BTreeMap> = broker_config - .get(&PropertyNameKind::File( - JVM_SECURITY_PROPERTIES_FILE.to_string(), - )) - .cloned() - .unwrap_or_default() - .into_iter() - .map(|(k, v)| (k, Some(v))) - .collect(); - - let mut cm_builder = ConfigMapBuilder::new(); - cm_builder - .metadata( - ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(rolegroup.object_name()) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(MetadataBuildSnafu)? - .build(), - ) - .add_data( - SERVER_PROPERTIES_FILE, - to_java_properties_string(server_cfg.iter().map(|(k, v)| (k, v))).with_context( - |_| SerializeZooCfgSnafu { - rolegroup: rolegroup.clone(), - }, - )?, - ) - .add_data( - JVM_SECURITY_PROPERTIES_FILE, - to_java_properties_string(jvm_sec_props.iter()).with_context(|_| { - JvmSecurityPopertiesSnafu { - rolegroup: rolegroup.role_group.clone(), - } - })?, - ); - - tracing::debug!(?server_cfg, "Applied server config"); - tracing::debug!(?jvm_sec_props, "Applied JVM config"); - - extend_role_group_config_map(rolegroup, &merged_config.logging, &mut cm_builder).context( - InvalidLoggingConfigSnafu { - cm_name: rolegroup.object_name(), - }, - )?; - - cm_builder - .build() - .with_context(|_| BuildRoleGroupConfigSnafu { - rolegroup: rolegroup.clone(), - }) -} - -/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup -/// -/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. -fn build_broker_rolegroup_service( - kafka: &v1alpha1::KafkaCluster, - resolved_product_image: &ResolvedProductImage, - rolegroup: &RoleGroupRef, -) -> Result { - Ok(Service { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(rolegroup.object_name()) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(MetadataBuildSnafu)? - .with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) - .build(), - spec: Some(ServiceSpec { - cluster_ip: Some("None".to_string()), - selector: Some( - Labels::role_group_selector( - kafka, - APP_NAME, - &rolegroup.role, - &rolegroup.role_group, - ) - .context(LabelBuildSnafu)? - .into(), - ), - publish_not_ready_addresses: Some(true), - ..ServiceSpec::default() - }), - status: None, - }) -} - -/// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. -/// -/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_broker_rolegroup_service`]). -#[allow(clippy::too_many_arguments)] -fn build_broker_rolegroup_statefulset( - kafka: &v1alpha1::KafkaCluster, - kafka_role: &KafkaRole, - resolved_product_image: &ResolvedProductImage, - rolegroup_ref: &RoleGroupRef, - broker_config: &HashMap>, - opa_connect_string: Option<&str>, - kafka_security: &KafkaTlsSecurity, - merged_config: &KafkaConfig, - service_account: &ServiceAccount, - cluster_info: &KubernetesClusterInfo, -) -> Result { - let role = kafka.role(kafka_role).context(InternalOperatorSnafu)?; - let rolegroup = kafka - .rolegroup(rolegroup_ref) - .context(InternalOperatorSnafu)?; - let recommended_object_labels = build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup_ref.role, - &rolegroup_ref.role_group, - ); - let recommended_labels = - Labels::recommended(recommended_object_labels.clone()).context(LabelBuildSnafu)?; - // Used for PVC templates that cannot be modified once they are deployed - let unversioned_recommended_labels = Labels::recommended(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - // A version value is required, and we do want to use the "recommended" format for the other desired labels - "none", - &rolegroup_ref.role, - &rolegroup_ref.role_group, - )) - .context(LabelBuildSnafu)?; - - let kcat_prober_container_name = Container::KcatProber.to_string(); - let mut cb_kcat_prober = - ContainerBuilder::new(&kcat_prober_container_name).context(InvalidContainerNameSnafu { - name: kcat_prober_container_name.clone(), - })?; - - let kafka_container_name = Container::Kafka.to_string(); - let mut cb_kafka = - ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { - name: kafka_container_name.clone(), - })?; - - let mut pod_builder = PodBuilder::new(); - - // Add TLS related volumes and volume mounts - let requested_secret_lifetime = merged_config - .requested_secret_lifetime - .context(MissingSecretLifetimeSnafu)?; - kafka_security - .add_volume_and_volume_mounts( - &mut pod_builder, - &mut cb_kcat_prober, - &mut cb_kafka, - &requested_secret_lifetime, - ) - .context(AddVolumesAndVolumeMountsSnafu)?; - - let mut pvcs = merged_config.resources.storage.build_pvcs(); - - // bootstrap listener should be persistent, - // main broker listener is an ephemeral PVC instead - pvcs.push( - ListenerOperatorVolumeSourceBuilder::new( - &ListenerReference::ListenerName(kafka.bootstrap_service_name(rolegroup_ref)), - &unversioned_recommended_labels, - ) - .build_pvc(LISTENER_BOOTSTRAP_VOLUME_NAME) - // FIXME (@Techassi): This should either be an expect (if it can never fail) or should be - // handled via a proper error handling - .unwrap(), - ); - - if kafka_security.has_kerberos_enabled() { - add_kerberos_pod_config( - kafka_security, - kafka_role, - &mut cb_kcat_prober, - &mut cb_kafka, - &mut pod_builder, - ) - .context(AddKerberosConfigSnafu)?; - } - - let mut env = broker_config - .get(&PropertyNameKind::Env) - .into_iter() - .flatten() - .map(|(k, v)| EnvVar { - name: k.clone(), - value: Some(v.clone()), - ..EnvVar::default() - }) - .collect::>(); - - env.push(EnvVar { - name: "ZOOKEEPER".to_string(), - value_from: Some(EnvVarSource { - config_map_key_ref: Some(ConfigMapKeySelector { - name: kafka.spec.cluster_config.zookeeper_config_map_name.clone(), - key: "ZOOKEEPER".to_string(), - ..ConfigMapKeySelector::default() - }), - ..EnvVarSource::default() - }), - ..EnvVar::default() - }); - - env.push(EnvVar { - name: "POD_NAME".to_string(), - value_from: Some(EnvVarSource { - field_ref: Some(ObjectFieldSelector { - api_version: Some("v1".to_string()), - field_path: "metadata.name".to_string(), - }), - ..EnvVarSource::default() - }), - ..EnvVar::default() - }); - - let kafka_listeners = get_kafka_listener_config( - kafka, - kafka_security, - &rolegroup_ref.object_name(), - cluster_info, - ) - .context(InvalidKafkaListenersSnafu)?; - - cb_kafka - .image_from_product_image(resolved_product_image) - .command(vec![ - "/bin/bash".to_string(), - "-x".to_string(), - "-euo".to_string(), - "pipefail".to_string(), - "-c".to_string(), - ]) - .args(vec![ - kafka_security - .kafka_container_commands( - &kafka_listeners, - opa_connect_string, - kafka_security.has_kerberos_enabled(), - ) - .join("\n"), - ]) - .add_env_var( - "EXTRA_ARGS", - construct_non_heap_jvm_args(merged_config, role, &rolegroup_ref.role_group) - .context(ConstructJvmArgumentsSnafu)?, - ) - .add_env_var( - KAFKA_HEAP_OPTS, - construct_heap_jvm_args(merged_config, role, &rolegroup_ref.role_group) - .context(ConstructJvmArgumentsSnafu)?, - ) - .add_env_var( - "KAFKA_LOG4J_OPTS", - format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}"), - ) - // Needed for the `containerdebug` process to log it's tracing information to. - .add_env_var( - "CONTAINERDEBUG_LOG_DIRECTORY", - format!("{STACKABLE_LOG_DIR}/containerdebug"), - ) - .add_env_vars(env) - .add_container_ports(container_ports(kafka_security)) - .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount("config", STACKABLE_CONFIG_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount( - LISTENER_BOOTSTRAP_VOLUME_NAME, - STACKABLE_LISTENER_BOOTSTRAP_DIR, - ) - .context(AddVolumeMountSnafu)? - .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount("log-config", STACKABLE_LOG_CONFIG_DIR) - .context(AddVolumeMountSnafu)? - .add_volume_mount("log", STACKABLE_LOG_DIR) - .context(AddVolumeMountSnafu)? - .resources(merged_config.resources.clone().into()); - - // Use kcat sidecar for probing container status rather than the official Kafka tools, since they incur a lot of - // unacceptable perf overhead - cb_kcat_prober - .image_from_product_image(resolved_product_image) - .command(vec!["sleep".to_string(), "infinity".to_string()]) - .add_env_vars(vec![EnvVar { - name: "POD_NAME".to_string(), - value_from: Some(EnvVarSource { - field_ref: Some(ObjectFieldSelector { - api_version: Some("v1".to_string()), - field_path: "metadata.name".to_string(), - }), - ..EnvVarSource::default() - }), - ..EnvVar::default() - }]) - .resources( - ResourceRequirementsBuilder::new() - .with_cpu_request("100m") - .with_cpu_limit("200m") - .with_memory_request("128Mi") - .with_memory_limit("128Mi") - .build(), - ) - .add_volume_mount( - LISTENER_BOOTSTRAP_VOLUME_NAME, - STACKABLE_LISTENER_BOOTSTRAP_DIR, - ) - .context(AddVolumeMountSnafu)? - .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) - .context(AddVolumeMountSnafu)? - // Only allow the global load balancing service to send traffic to pods that are members of the quorum - // This also acts as a hint to the StatefulSet controller to wait for each pod to enter quorum before taking down the next - .readiness_probe(Probe { - exec: Some(ExecAction { - // If the broker is able to get its fellow cluster members then it has at least completed basic registration at some point - command: Some(kafka_security.kcat_prober_container_commands()), - }), - timeout_seconds: Some(5), - period_seconds: Some(2), - ..Probe::default() - }); - - if let Some(ContainerLogConfig { - choice: - Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { - custom: ConfigMapLogConfig { config_map }, - })), - }) = merged_config.logging.containers.get(&Container::Kafka) - { - pod_builder - .add_volume( - VolumeBuilder::new("log-config") - .with_config_map(config_map) - .build(), - ) - .context(AddVolumeSnafu)?; - } else { - pod_builder - .add_volume( - VolumeBuilder::new("log-config") - .with_config_map(rolegroup_ref.object_name()) - .build(), - ) - .context(AddVolumeSnafu)?; - } - - let metadata = ObjectMetaBuilder::new() - .with_recommended_labels(recommended_object_labels) - .context(MetadataBuildSnafu)? - .build(); - - pod_builder - .metadata(metadata) - .image_pull_secrets_from_product_image(resolved_product_image) - .add_container(cb_kafka.build()) - .add_container(cb_kcat_prober.build()) - .affinity(&merged_config.affinity) - .add_volume(Volume { - name: "config".to_string(), - config_map: Some(ConfigMapVolumeSource { - name: rolegroup_ref.object_name(), - ..ConfigMapVolumeSource::default() - }), - ..Volume::default() - }) - .context(AddVolumeSnafu)? - // bootstrap volume is a persistent volume template instead, to keep addresses persistent - .add_listener_volume_by_listener_class( - LISTENER_BROKER_VOLUME_NAME, - &merged_config.broker_listener_class, - &recommended_labels, - ) - .context(AddListenerVolumeSnafu)? - .add_empty_dir_volume( - "log", - Some(product_logging::framework::calculate_log_volume_size_limit( - &[MAX_KAFKA_LOG_FILES_SIZE], - )), - ) - .context(AddVolumeSnafu)? - .service_account_name(service_account.name_any()) - .security_context(PodSecurityContextBuilder::new().fs_group(1000).build()); - - // Add vector container after kafka container to keep the defaulting into kafka container - if merged_config.logging.enable_vector_agent { - match &kafka.spec.cluster_config.vector_aggregator_config_map_name { - Some(vector_aggregator_config_map_name) => { - pod_builder.add_container( - product_logging::framework::vector_container( - resolved_product_image, - "config", - "log", - merged_config.logging.containers.get(&Container::Vector), - ResourceRequirementsBuilder::new() - .with_cpu_request("250m") - .with_cpu_limit("500m") - .with_memory_request("128Mi") - .with_memory_limit("128Mi") - .build(), - vector_aggregator_config_map_name, - ) - .context(ConfigureLoggingSnafu)?, - ); - } - None => { - VectorAggregatorConfigMapMissingSnafu.fail()?; - } - } - } - - add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; - - let mut pod_template = pod_builder.build_template(); - - let pod_template_spec = pod_template.spec.get_or_insert_with(PodSpec::default); - // Don't run kcat pod as PID 1, to ensure that default signal handlers apply - pod_template_spec.share_process_namespace = Some(true); - - pod_template.merge_from(role.config.pod_overrides.clone()); - pod_template.merge_from(rolegroup.config.pod_overrides.clone()); - - Ok(StatefulSet { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(kafka) - .name(rolegroup_ref.object_name()) - .ownerreference_from_resource(kafka, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - kafka, - KAFKA_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup_ref.role, - &rolegroup_ref.role_group, - )) - .context(MetadataBuildSnafu)? - .build(), - spec: Some(StatefulSetSpec { - pod_management_policy: Some("Parallel".to_string()), - replicas: rolegroup.replicas.map(i32::from), - selector: LabelSelector { - match_labels: Some( - Labels::role_group_selector( - kafka, - APP_NAME, - &rolegroup_ref.role, - &rolegroup_ref.role_group, - ) - .context(LabelBuildSnafu)? - .into(), - ), - ..LabelSelector::default() - }, - service_name: Some(rolegroup_ref.object_name()), - template: pod_template, - volume_claim_templates: Some(pvcs), - ..StatefulSetSpec::default() - }), - status: None, - }) -} - pub fn error_policy( _obj: Arc>, error: &Error, @@ -1184,53 +485,69 @@ pub fn error_policy( } } -/// We only expose client HTTP / HTTPS and Metrics ports. -fn listener_ports(kafka_security: &KafkaTlsSecurity) -> Vec { - let mut ports = vec![ - listener::v1alpha1::ListenerPort { - name: METRICS_PORT_NAME.to_string(), - port: METRICS_PORT.into(), - protocol: Some("TCP".to_string()), - }, - listener::v1alpha1::ListenerPort { - name: kafka_security.client_port_name().to_string(), - port: kafka_security.client_port().into(), - protocol: Some("TCP".to_string()), - }, - ]; - if kafka_security.has_kerberos_enabled() { - ports.push(listener::v1alpha1::ListenerPort { - name: kafka_security.bootstrap_port_name().to_string(), - port: kafka_security.bootstrap_port().into(), - protocol: Some("TCP".to_string()), - }); - } - ports -} +/// Defines all required roles and their required configuration. +/// +/// The roles and their configs are then validated and complemented by the product config. +/// +/// # Arguments +/// * `resource` - The TrinoCluster containing the role definitions. +/// * `version` - The TrinoCluster version. +/// * `product_config` - The product config to validate and complement the user config. +/// +fn validated_product_config( + kafka: &v1alpha1::KafkaCluster, + product_version: &str, + product_config: &ProductConfigManager, +) -> Result { + let mut roles = HashMap::new(); + + roles.insert( + KafkaRole::Broker.to_string(), + ( + vec![ + PropertyNameKind::File(BROKER_PROPERTIES_FILE.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + PropertyNameKind::Env, + ], + kafka + .broker_role() + .cloned() + .context(MissingKafkaRoleSnafu { + role: KafkaRole::Broker, + })? + .erase(), + ), + ); -/// We only expose client HTTP / HTTPS and Metrics ports. -fn container_ports(kafka_security: &KafkaTlsSecurity) -> Vec { - let mut ports = vec![ - ContainerPort { - name: Some(METRICS_PORT_NAME.to_string()), - container_port: METRICS_PORT.into(), - protocol: Some("TCP".to_string()), - ..ContainerPort::default() - }, - ContainerPort { - name: Some(kafka_security.client_port_name().to_string()), - container_port: kafka_security.client_port().into(), - protocol: Some("TCP".to_string()), - ..ContainerPort::default() - }, - ]; - if kafka_security.has_kerberos_enabled() { - ports.push(ContainerPort { - name: Some(kafka_security.bootstrap_port_name().to_string()), - container_port: kafka_security.bootstrap_port().into(), - protocol: Some("TCP".to_string()), - ..ContainerPort::default() - }); + if kafka.is_controller_configured() { + roles.insert( + KafkaRole::Controller.to_string(), + ( + vec![ + PropertyNameKind::File(CONTROLLER_PROPERTIES_FILE.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + PropertyNameKind::Env, + ], + kafka + .controller_role() + .cloned() + .context(MissingKafkaRoleSnafu { + role: KafkaRole::Controller, + })? + .erase(), + ), + ); } - ports + + let role_config = + transform_all_roles_to_config(kafka, roles).context(GenerateProductConfigSnafu)?; + + validate_all_roles_and_groups_config( + product_version, + &role_config, + product_config, + false, + false, + ) + .context(InvalidProductConfigSnafu) } diff --git a/rust/operator-binary/src/kerberos.rs b/rust/operator-binary/src/kerberos.rs index e96ceea9..e22de94a 100644 --- a/rust/operator-binary/src/kerberos.rs +++ b/rust/operator-binary/src/kerberos.rs @@ -12,8 +12,8 @@ use stackable_operator::builder::{ }; use crate::crd::{ - KafkaRole, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_KERBEROS_DIR, - STACKABLE_KERBEROS_KRB5_PATH, security::KafkaTlsSecurity, + LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, STACKABLE_KERBEROS_DIR, + STACKABLE_KERBEROS_KRB5_PATH, role::KafkaRole, security::KafkaTlsSecurity, }; #[derive(Snafu, Debug)] diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 2d36a64f..2404e690 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -45,6 +45,7 @@ mod kafka_controller; mod kerberos; mod operations; mod product_logging; +mod resource; mod utils; mod built_info { @@ -207,7 +208,7 @@ fn references_config_map( return false; }; - kafka.spec.cluster_config.zookeeper_config_map_name == config_map.name_any() + kafka.spec.cluster_config.zookeeper_config_map_name == Some(config_map.name_any()) || match &kafka.spec.cluster_config.authorization.opa { Some(opa_config) => opa_config.config_map_name == config_map.name_any(), None => false, diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index a3cc3f02..b03187f4 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; use snafu::{ResultExt, Snafu}; use stackable_operator::builder::pod::PodBuilder; -use crate::crd::KafkaConfig; +use crate::crd::role::AnyConfig; #[derive(Debug, Snafu)] pub enum Error { @@ -20,14 +20,14 @@ pub fn graceful_shutdown_config_properties() -> BTreeMap { } pub fn add_graceful_shutdown_config( - merged_config: &KafkaConfig, + merged_config: &AnyConfig, pod_builder: &mut PodBuilder, ) -> Result<(), Error> { // This must be always set by the merge mechanism, as we provide a default value, // users can not disable graceful shutdown. - if let Some(graceful_shutdown_timeout) = merged_config.graceful_shutdown_timeout { + if let Some(graceful_shutdown_timeout) = &merged_config.graceful_shutdown_timeout { pod_builder - .termination_grace_period(&graceful_shutdown_timeout) + .termination_grace_period(graceful_shutdown_timeout) .context(SetTerminationGracePeriodSnafu)?; } diff --git a/rust/operator-binary/src/operations/pdb.rs b/rust/operator-binary/src/operations/pdb.rs index 31c760bf..d6211fa0 100644 --- a/rust/operator-binary/src/operations/pdb.rs +++ b/rust/operator-binary/src/operations/pdb.rs @@ -5,7 +5,7 @@ use stackable_operator::{ }; use crate::{ - crd::{APP_NAME, KafkaRole, OPERATOR_NAME, v1alpha1}, + crd::{APP_NAME, OPERATOR_NAME, role::KafkaRole, v1alpha1}, kafka_controller::KAFKA_CONTROLLER_NAME, }; @@ -35,6 +35,7 @@ pub async fn add_pdbs( } let max_unavailable = pdb.max_unavailable.unwrap_or(match role { KafkaRole::Broker => max_unavailable_brokers(), + KafkaRole::Controller => max_unavailable_controllers(), }); let pdb = PodDisruptionBudgetBuilder::new_with_role( kafka, @@ -61,3 +62,8 @@ fn max_unavailable_brokers() -> u16 { // We can not make any assumptions about topic replication factors. 1 } + +fn max_unavailable_controllers() -> u16 { + // TODO: what do we want here? + 1 +} diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 2ba77898..b7990be6 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -1,92 +1,151 @@ -use snafu::Snafu; +use std::{borrow::Cow, fmt::Display}; + use stackable_operator::{ builder::configmap::ConfigMapBuilder, memory::{BinaryMultiple, MemoryQuantity}, product_logging::{ self, - spec::{ContainerLogConfig, ContainerLogConfigChoice, Logging}, + spec::{ContainerLogConfig, ContainerLogConfigChoice}, }, role_utils::RoleGroupRef, }; -use crate::crd::{Container, STACKABLE_LOG_DIR, v1alpha1}; - -#[derive(Snafu, Debug)] -pub enum Error { - #[snafu(display("object has no namespace"))] - ObjectHasNoNamespace, - - #[snafu(display("failed to retrieve the ConfigMap {cm_name}"))] - ConfigMapNotFound { - source: stackable_operator::client::Error, - cm_name: String, - }, - - #[snafu(display("failed to retrieve the entry {entry} for ConfigMap {cm_name}"))] - MissingConfigMapEntry { - entry: &'static str, - cm_name: String, - }, - - #[snafu(display("crd validation failure"))] - CrdValidationFailure { source: crate::crd::Error }, -} - -type Result = std::result::Result; +use crate::crd::{ + role::{AnyConfig, broker::BrokerContainer, controller::ControllerContainer}, + v1alpha1, +}; +pub const STACKABLE_LOG_CONFIG_DIR: &str = "/stackable/log_config"; +pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; +// log4j pub const LOG4J_CONFIG_FILE: &str = "log4j.properties"; -pub const KAFKA_LOG_FILE: &str = "kafka.log4j.xml"; - +pub const KAFKA_LOG4J_FILE: &str = "kafka.log4j.xml"; +// log4j2 +pub const LOG4J2_CONFIG_FILE: &str = "log4j2.properties"; +pub const KAFKA_LOG4J2_FILE: &str = "kafka.log4j2.xml"; +// max size pub const MAX_KAFKA_LOG_FILES_SIZE: MemoryQuantity = MemoryQuantity { value: 10.0, unit: BinaryMultiple::Mebi, }; -const CONSOLE_CONVERSION_PATTERN: &str = "[%d] %p %m (%c)%n"; +const CONSOLE_CONVERSION_PATTERN_LOG4J: &str = "[%d] %p %m (%c)%n"; +const CONSOLE_CONVERSION_PATTERN_LOG4J2: &str = "%d{ISO8601} %p [%t] %c - %m%n"; + +pub fn kafka_log_opts(product_version: &str) -> String { + if product_version.starts_with("3.") { + format!("-Dlog4j.configuration=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J_CONFIG_FILE}") + } else { + format!("-Dlog4j2.configurationFile=file:{STACKABLE_LOG_CONFIG_DIR}/{LOG4J2_CONFIG_FILE}") + } +} + +pub fn kafka_log_opts_env_var() -> String { + "KAFKA_LOG4J_OPTS".to_string() +} /// Extend the role group ConfigMap with logging and Vector configurations pub fn extend_role_group_config_map( + product_version: &str, rolegroup: &RoleGroupRef, - logging: &Logging, + merged_config: &AnyConfig, + cm_builder: &mut ConfigMapBuilder, +) { + let container_name = match merged_config { + AnyConfig::Broker(_) => BrokerContainer::Kafka.to_string(), + AnyConfig::Controller(_) => ControllerContainer::Kafka.to_string(), + }; + + // Starting with Kafka 4.0, log4j2 is used instead of log4j. + match product_version.starts_with("3.") { + true => add_log4j_config_if_automatic( + cm_builder, + Some(merged_config.kafka_logging()), + LOG4J_CONFIG_FILE, + container_name, + KAFKA_LOG4J_FILE, + MAX_KAFKA_LOG_FILES_SIZE, + ), + false => add_log4j2_config_if_automatic( + cm_builder, + Some(merged_config.kafka_logging()), + LOG4J2_CONFIG_FILE, + container_name, + KAFKA_LOG4J2_FILE, + MAX_KAFKA_LOG_FILES_SIZE, + ), + } + + let vector_log_config = merged_config.vector_logging(); + let vector_log_config = if let ContainerLogConfig { + choice: Some(ContainerLogConfigChoice::Automatic(log_config)), + } = &*vector_log_config + { + Some(log_config) + } else { + None + }; + + if merged_config.vector_logging_enabled() { + cm_builder.add_data( + product_logging::framework::VECTOR_CONFIG_FILE, + product_logging::framework::create_vector_config(rolegroup, vector_log_config), + ); + } +} + +fn add_log4j_config_if_automatic( cm_builder: &mut ConfigMapBuilder, -) -> Result<()> { + log_config: Option>, + log_config_file: &str, + container_name: impl Display, + log_file: &str, + max_log_file_size: MemoryQuantity, +) { if let Some(ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = logging.containers.get(&Container::Kafka) + }) = log_config.as_deref() { cm_builder.add_data( - LOG4J_CONFIG_FILE, + log_config_file, product_logging::framework::create_log4j_config( - &format!( - "{STACKABLE_LOG_DIR}/{container}", - container = Container::Kafka - ), - KAFKA_LOG_FILE, - MAX_KAFKA_LOG_FILES_SIZE + &format!("{STACKABLE_LOG_DIR}/{container_name}"), + log_file, + max_log_file_size .scale_to(BinaryMultiple::Mebi) .floor() .value as u32, - CONSOLE_CONVERSION_PATTERN, + CONSOLE_CONVERSION_PATTERN_LOG4J, log_config, ), ); } +} - let vector_log_config = if let Some(ContainerLogConfig { +fn add_log4j2_config_if_automatic( + cm_builder: &mut ConfigMapBuilder, + log_config: Option>, + log_config_file: &str, + container_name: impl Display, + log_file: &str, + max_log_file_size: MemoryQuantity, +) { + if let Some(ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = logging.containers.get(&Container::Vector) + }) = log_config.as_deref() { - Some(log_config) - } else { - None - }; - - if logging.enable_vector_agent { cm_builder.add_data( - product_logging::framework::VECTOR_CONFIG_FILE, - product_logging::framework::create_vector_config(rolegroup, vector_log_config), + log_config_file, + product_logging::framework::create_log4j2_config( + &format!("{STACKABLE_LOG_DIR}/{container_name}",), + log_file, + max_log_file_size + .scale_to(BinaryMultiple::Mebi) + .floor() + .value as u32, + CONSOLE_CONVERSION_PATTERN_LOG4J2, + log_config, + ), ); } - - Ok(()) } diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs new file mode 100644 index 00000000..54a06921 --- /dev/null +++ b/rust/operator-binary/src/resource/configmap.rs @@ -0,0 +1,144 @@ +use std::collections::{BTreeMap, HashMap}; + +use product_config::{types::PropertyNameKind, writer::to_java_properties_string}; +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::{configmap::ConfigMapBuilder, meta::ObjectMetaBuilder}, + commons::product_image_selection::ResolvedProductImage, + k8s_openapi::api::core::v1::ConfigMap, + role_utils::RoleGroupRef, +}; + +use crate::{ + crd::{JVM_SECURITY_PROPERTIES_FILE, role::AnyConfig, security::KafkaTlsSecurity, v1alpha1}, + kafka_controller::KAFKA_CONTROLLER_NAME, + operations::graceful_shutdown::graceful_shutdown_config_properties, + product_logging::extend_role_group_config_map, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to build ConfigMap for {}", rolegroup))] + BuildRoleGroupConfig { + source: stackable_operator::builder::configmap::Error, + rolegroup: RoleGroupRef, + }, + + #[snafu(display( + "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", + rolegroup + ))] + JvmSecurityPoperties { + source: product_config::writer::PropertiesWriterError, + rolegroup: String, + }, + + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to serialize config for {rolegroup}"))] + SerializeConfig { + source: product_config::writer::PropertiesWriterError, + rolegroup: RoleGroupRef, + }, +} + +/// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator +pub fn build_rolegroup_config_map( + kafka: &v1alpha1::KafkaCluster, + resolved_product_image: &ResolvedProductImage, + kafka_security: &KafkaTlsSecurity, + rolegroup: &RoleGroupRef, + rolegroup_config: &HashMap>, + merged_config: &AnyConfig, +) -> Result { + let kafka_config_file_name = merged_config.config_file_name(); + + let mut kafka_config = rolegroup_config + .get(&PropertyNameKind::File(kafka_config_file_name.to_string())) + .cloned() + .unwrap_or_default(); + + match merged_config { + AnyConfig::Broker(_) => kafka_config.extend(kafka_security.broker_config_settings()), + AnyConfig::Controller(_) => { + kafka_config.extend(kafka_security.controller_config_settings()) + } + } + + kafka_config.extend(graceful_shutdown_config_properties()); + + let kafka_config = kafka_config + .into_iter() + .map(|(k, v)| (k, Some(v))) + .collect::>(); + + let jvm_sec_props: BTreeMap> = rolegroup_config + .get(&PropertyNameKind::File( + JVM_SECURITY_PROPERTIES_FILE.to_string(), + )) + .cloned() + .unwrap_or_default() + .into_iter() + .map(|(k, v)| (k, Some(v))) + .collect(); + + let mut cm_builder = ConfigMapBuilder::new(); + cm_builder + .metadata( + ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + ) + .add_data( + kafka_config_file_name, + to_java_properties_string(kafka_config.iter().map(|(k, v)| (k, v))).with_context( + |_| SerializeConfigSnafu { + rolegroup: rolegroup.clone(), + }, + )?, + ) + .add_data( + JVM_SECURITY_PROPERTIES_FILE, + to_java_properties_string(jvm_sec_props.iter()).with_context(|_| { + JvmSecurityPopertiesSnafu { + rolegroup: rolegroup.role_group.clone(), + } + })?, + ); + + tracing::debug!(?kafka_config, "Applied kafka config"); + tracing::debug!(?jvm_sec_props, "Applied JVM config"); + + extend_role_group_config_map( + &resolved_product_image.product_version, + rolegroup, + merged_config, + &mut cm_builder, + ); + + cm_builder + .build() + .with_context(|_| BuildRoleGroupConfigSnafu { + rolegroup: rolegroup.clone(), + }) +} diff --git a/rust/operator-binary/src/resource/listener.rs b/rust/operator-binary/src/resource/listener.rs new file mode 100644 index 00000000..23cc254f --- /dev/null +++ b/rust/operator-binary/src/resource/listener.rs @@ -0,0 +1,85 @@ +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::meta::ObjectMetaBuilder, commons::product_image_selection::ResolvedProductImage, + crd::listener, role_utils::RoleGroupRef, +}; + +use crate::{ + crd::{ + METRICS_PORT, METRICS_PORT_NAME, role::broker::BrokerConfig, security::KafkaTlsSecurity, + v1alpha1, + }, + kafka_controller::KAFKA_CONTROLLER_NAME, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, +} + +/// Kafka clients will use the load-balanced bootstrap listener to get a list of broker addresses and will use those to +/// transmit data to the correct broker. +// TODO (@NickLarsenNZ): Move shared functionality to stackable-operator +pub fn build_broker_rolegroup_bootstrap_listener( + kafka: &v1alpha1::KafkaCluster, + resolved_product_image: &ResolvedProductImage, + kafka_security: &KafkaTlsSecurity, + rolegroup: &RoleGroupRef, + merged_config: &BrokerConfig, +) -> Result { + Ok(listener::v1alpha1::Listener { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(kafka.bootstrap_service_name(rolegroup)) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + spec: listener::v1alpha1::ListenerSpec { + class_name: Some(merged_config.bootstrap_listener_class.clone()), + ports: Some(listener_ports(kafka_security)), + ..listener::v1alpha1::ListenerSpec::default() + }, + status: None, + }) +} + +/// We only expose client HTTP / HTTPS and Metrics ports. +fn listener_ports(kafka_security: &KafkaTlsSecurity) -> Vec { + let mut ports = vec![ + listener::v1alpha1::ListenerPort { + name: METRICS_PORT_NAME.to_string(), + port: METRICS_PORT.into(), + protocol: Some("TCP".to_string()), + }, + listener::v1alpha1::ListenerPort { + name: kafka_security.client_port_name().to_string(), + port: kafka_security.client_port().into(), + protocol: Some("TCP".to_string()), + }, + ]; + if kafka_security.has_kerberos_enabled() { + ports.push(listener::v1alpha1::ListenerPort { + name: kafka_security.bootstrap_port_name().to_string(), + port: kafka_security.bootstrap_port().into(), + protocol: Some("TCP".to_string()), + }); + } + ports +} diff --git a/rust/operator-binary/src/resource/mod.rs b/rust/operator-binary/src/resource/mod.rs new file mode 100644 index 00000000..a79483f8 --- /dev/null +++ b/rust/operator-binary/src/resource/mod.rs @@ -0,0 +1,4 @@ +pub mod configmap; +pub mod listener; +pub mod service; +pub mod statefulset; diff --git a/rust/operator-binary/src/resource/service.rs b/rust/operator-binary/src/resource/service.rs new file mode 100644 index 00000000..d9c7c7c9 --- /dev/null +++ b/rust/operator-binary/src/resource/service.rs @@ -0,0 +1,75 @@ +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::meta::ObjectMetaBuilder, + commons::product_image_selection::ResolvedProductImage, + k8s_openapi::api::core::v1::{Service, ServiceSpec}, + kvp::{Label, Labels}, + role_utils::RoleGroupRef, +}; + +use crate::{ + crd::{APP_NAME, v1alpha1}, + kafka_controller::KAFKA_CONTROLLER_NAME, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build Labels"))] + LabelBuild { + source: stackable_operator::kvp::LabelError, + }, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, +} + +/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup +/// +/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. +pub fn build_rolegroup_service( + kafka: &v1alpha1::KafkaCluster, + resolved_product_image: &ResolvedProductImage, + rolegroup: &RoleGroupRef, +) -> Result { + Ok(Service { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(MetadataBuildSnafu)? + .with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) + .build(), + spec: Some(ServiceSpec { + cluster_ip: Some("None".to_string()), + selector: Some( + Labels::role_group_selector( + kafka, + APP_NAME, + &rolegroup.role, + &rolegroup.role_group, + ) + .context(LabelBuildSnafu)? + .into(), + ), + publish_not_ready_addresses: Some(true), + ..ServiceSpec::default() + }), + status: None, + }) +} diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs new file mode 100644 index 00000000..18bcc04c --- /dev/null +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -0,0 +1,912 @@ +use std::{ + collections::{BTreeMap, HashMap}, + ops::Deref, +}; + +use product_config::types::PropertyNameKind; +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + builder::{ + meta::ObjectMetaBuilder, + pod::{ + PodBuilder, + container::ContainerBuilder, + resources::ResourceRequirementsBuilder, + security::PodSecurityContextBuilder, + volume::{ListenerOperatorVolumeSourceBuilder, ListenerReference, VolumeBuilder}, + }, + }, + commons::product_image_selection::ResolvedProductImage, + k8s_openapi::{ + DeepMerge, + api::{ + apps::v1::{StatefulSet, StatefulSetSpec, StatefulSetUpdateStrategy}, + core::v1::{ + ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, EnvVarSource, + ExecAction, Lifecycle, LifecycleHandler, ObjectFieldSelector, PodSpec, Probe, + ServiceAccount, SleepAction, TCPSocketAction, Volume, + }, + }, + apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, + }, + kube::ResourceExt, + kvp::Labels, + product_logging::{ + self, + spec::{ + ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, + CustomContainerLogConfig, + }, + }, + role_utils::RoleGroupRef, + utils::cluster_info::KubernetesClusterInfo, +}; + +use crate::{ + config::{ + command::{broker_kafka_container_commands, controller_kafka_container_command}, + node_id_hasher::node_id_hash32_offset, + }, + crd::{ + self, APP_NAME, KAFKA_HEAP_OPTS, LISTENER_BOOTSTRAP_VOLUME_NAME, + LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, + STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, + STACKABLE_LISTENER_BROKER_DIR, + listener::get_kafka_listener_config, + role::{ + AnyConfig, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BrokerContainer, + controller::ControllerContainer, + }, + security::KafkaTlsSecurity, + v1alpha1, + }, + kafka_controller::KAFKA_CONTROLLER_NAME, + kerberos::add_kerberos_pod_config, + operations::graceful_shutdown::add_graceful_shutdown_config, + product_logging::{ + MAX_KAFKA_LOG_FILES_SIZE, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, kafka_log_opts, + kafka_log_opts_env_var, + }, + utils::build_recommended_labels, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to add kerberos config"))] + AddKerberosConfig { source: crate::kerberos::Error }, + + #[snafu(display("failed to add listener volume"))] + AddListenerVolume { + source: stackable_operator::builder::pod::Error, + }, + + #[snafu(display("failed to add Secret Volumes and VolumeMounts"))] + AddVolumesAndVolumeMounts { source: crate::crd::security::Error }, + + #[snafu(display("failed to add needed volumeMount"))] + AddVolumeMount { + source: stackable_operator::builder::pod::container::Error, + }, + + #[snafu(display("failed to add needed volume"))] + AddVolume { + source: stackable_operator::builder::pod::Error, + }, + + #[snafu(display("failed to build bootstrap listener pvc"))] + BuildBootstrapListenerPvc { + source: stackable_operator::builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, + }, + + #[snafu(display("failed to build pod descriptors"))] + BuildPodDescriptors { source: crate::crd::Error }, + + #[snafu(display("failed to configure logging"))] + ConfigureLogging { + source: stackable_operator::product_logging::framework::LoggingError, + }, + + #[snafu(display("failed to construct JVM arguments"))] + ConstructJvmArguments { source: crate::crd::role::Error }, + + #[snafu(display("failed to configure graceful shutdown"))] + GracefulShutdown { + source: crate::operations::graceful_shutdown::Error, + }, + + #[snafu(display("invalid Container name [{name}]"))] + InvalidContainerName { + name: String, + source: stackable_operator::builder::pod::container::Error, + }, + + #[snafu(display("invalid kafka listeners"))] + InvalidKafkaListeners { + source: crate::crd::listener::KafkaListenerError, + }, + + #[snafu(display("failed to build Labels"))] + LabelBuild { + source: stackable_operator::kvp::LabelError, + }, + + #[snafu(display("failed to merge pod overrides"))] + MergePodOverrides { source: crd::role::Error }, + + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("missing secret lifetime"))] + MissingSecretLifetime, + + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to retrieve rolegroup replicas"))] + RoleGroupReplicas { source: crd::role::Error }, + + #[snafu(display( + "cluster does not define 'metadata.name' which is required for the Kafka cluster id" + ))] + ClusterIdMissing, + + #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] + VectorAggregatorConfigMapMissing, +} + +/// The broker rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. +/// +/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding +/// [`Service`](`stackable_operator::k8s_openapi::api::core::v1::Service`) from [`build_rolegroup_service`](`crate::resource::service::build_rolegroup_service`). +#[allow(clippy::too_many_arguments)] +pub fn build_broker_rolegroup_statefulset( + kafka: &v1alpha1::KafkaCluster, + kafka_role: &KafkaRole, + resolved_product_image: &ResolvedProductImage, + rolegroup_ref: &RoleGroupRef, + broker_config: &HashMap>, + opa_connect_string: Option<&str>, + kafka_security: &KafkaTlsSecurity, + merged_config: &AnyConfig, + service_account: &ServiceAccount, + cluster_info: &KubernetesClusterInfo, +) -> Result { + let recommended_object_labels = build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ); + let recommended_labels = + Labels::recommended(recommended_object_labels.clone()).context(LabelBuildSnafu)?; + // Used for PVC templates that cannot be modified once they are deployed + let unversioned_recommended_labels = Labels::recommended(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + // A version value is required, and we do want to use the "recommended" format for the other desired labels + "none", + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(LabelBuildSnafu)?; + + let kcat_prober_container_name = BrokerContainer::KcatProber.to_string(); + let mut cb_kcat_prober = + ContainerBuilder::new(&kcat_prober_container_name).context(InvalidContainerNameSnafu { + name: kcat_prober_container_name.clone(), + })?; + + let kafka_container_name = BrokerContainer::Kafka.to_string(); + let mut cb_kafka = + ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { + name: kafka_container_name.clone(), + })?; + + let mut pod_builder = PodBuilder::new(); + + // Add TLS related volumes and volume mounts + let requested_secret_lifetime = merged_config + .deref() + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; + kafka_security + .add_broker_volume_and_volume_mounts( + &mut pod_builder, + &mut cb_kcat_prober, + &mut cb_kafka, + &requested_secret_lifetime, + ) + .context(AddVolumesAndVolumeMountsSnafu)?; + + let mut pvcs = merged_config.resources().storage.build_pvcs(); + + // bootstrap listener should be persistent, + // main broker listener is an ephemeral PVC instead + pvcs.push( + ListenerOperatorVolumeSourceBuilder::new( + &ListenerReference::ListenerName(kafka.bootstrap_service_name(rolegroup_ref)), + &unversioned_recommended_labels, + ) + .build_pvc(LISTENER_BOOTSTRAP_VOLUME_NAME) + .context(BuildBootstrapListenerPvcSnafu)?, + ); + + if kafka_security.has_kerberos_enabled() { + add_kerberos_pod_config( + kafka_security, + kafka_role, + &mut cb_kcat_prober, + &mut cb_kafka, + &mut pod_builder, + ) + .context(AddKerberosConfigSnafu)?; + } + + let mut env = broker_config + .get(&PropertyNameKind::Env) + .into_iter() + .flatten() + .map(|(k, v)| EnvVar { + name: k.clone(), + value: Some(v.clone()), + ..EnvVar::default() + }) + .collect::>(); + + if let Some(zookeeper_config_map_name) = &kafka.spec.cluster_config.zookeeper_config_map_name { + env.push(EnvVar { + name: "ZOOKEEPER".to_string(), + value_from: Some(EnvVarSource { + config_map_key_ref: Some(ConfigMapKeySelector { + name: zookeeper_config_map_name.to_string(), + key: "ZOOKEEPER".to_string(), + ..ConfigMapKeySelector::default() + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }) + }; + + env.push(EnvVar { + name: "POD_NAME".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.name".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }); + + let kafka_listeners = get_kafka_listener_config( + kafka, + kafka_security, + &rolegroup_ref.object_name(), + cluster_info, + ) + .context(InvalidKafkaListenersSnafu)?; + + let cluster_id = kafka.cluster_id().context(ClusterIdMissingSnafu)?; + + cb_kafka + .image_from_product_image(resolved_product_image) + .command(vec![ + "/bin/bash".to_string(), + "-x".to_string(), + "-euo".to_string(), + "pipefail".to_string(), + "-c".to_string(), + ]) + .args(vec![broker_kafka_container_commands( + kafka, + cluster_id, + // we need controller pods + kafka + .pod_descriptors(&KafkaRole::Controller, cluster_info) + .context(BuildPodDescriptorsSnafu)?, + &kafka_listeners, + opa_connect_string, + kafka_security, + &resolved_product_image.product_version, + )]) + .add_env_var( + "EXTRA_ARGS", + kafka_role + .construct_non_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + KAFKA_HEAP_OPTS, + kafka_role + .construct_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + kafka_log_opts_env_var(), + kafka_log_opts(&resolved_product_image.product_version), + ) + // Needed for the `containerdebug` process to log it's tracing information to. + .add_env_var( + "CONTAINERDEBUG_LOG_DIRECTORY", + format!("{STACKABLE_LOG_DIR}/containerdebug"), + ) + .add_env_var( + KAFKA_NODE_ID_OFFSET, + node_id_hash32_offset(rolegroup_ref).to_string(), + ) + .add_env_vars(env) + .add_container_ports(container_ports(kafka_security)) + .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("config", STACKABLE_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount( + LISTENER_BOOTSTRAP_VOLUME_NAME, + STACKABLE_LISTENER_BOOTSTRAP_DIR, + ) + .context(AddVolumeMountSnafu)? + .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log-config", STACKABLE_LOG_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log", STACKABLE_LOG_DIR) + .context(AddVolumeMountSnafu)? + .resources(merged_config.resources().clone().into()); + + // Use kcat sidecar for probing container status rather than the official Kafka tools, since they incur a lot of + // unacceptable perf overhead + cb_kcat_prober + .image_from_product_image(resolved_product_image) + .command(vec!["sleep".to_string(), "infinity".to_string()]) + .add_env_vars(vec![EnvVar { + name: "POD_NAME".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.name".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }]) + .resources( + ResourceRequirementsBuilder::new() + .with_cpu_request("100m") + .with_cpu_limit("200m") + .with_memory_request("128Mi") + .with_memory_limit("128Mi") + .build(), + ) + .add_volume_mount( + LISTENER_BOOTSTRAP_VOLUME_NAME, + STACKABLE_LISTENER_BOOTSTRAP_DIR, + ) + .context(AddVolumeMountSnafu)? + .add_volume_mount(LISTENER_BROKER_VOLUME_NAME, STACKABLE_LISTENER_BROKER_DIR) + .context(AddVolumeMountSnafu)? + // Only allow the global load balancing service to send traffic to pods that are members of the quorum + // This also acts as a hint to the StatefulSet controller to wait for each pod to enter quorum before taking down the next + .readiness_probe(Probe { + exec: Some(ExecAction { + // If the broker is able to get its fellow cluster members then it has at least completed basic registration at some point + command: Some(kafka_security.kcat_prober_container_commands()), + }), + timeout_seconds: Some(5), + period_seconds: Some(2), + ..Probe::default() + }); + + if let ContainerLogConfig { + choice: + Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { + custom: ConfigMapLogConfig { config_map }, + })), + } = &*merged_config.kafka_logging() + { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(config_map) + .build(), + ) + .context(AddVolumeSnafu)?; + } else { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(rolegroup_ref.object_name()) + .build(), + ) + .context(AddVolumeSnafu)?; + } + + let metadata = ObjectMetaBuilder::new() + .with_recommended_labels(recommended_object_labels) + .context(MetadataBuildSnafu)? + .build(); + + if let Some(listener_class) = merged_config.listener_class() { + pod_builder + .add_listener_volume_by_listener_class( + LISTENER_BROKER_VOLUME_NAME, + listener_class, + &recommended_labels, + ) + .context(AddListenerVolumeSnafu)?; + } + pod_builder + .metadata(metadata) + .image_pull_secrets_from_product_image(resolved_product_image) + .add_container(cb_kafka.build()) + .add_container(cb_kcat_prober.build()) + .affinity(&merged_config.affinity) + .add_volume(Volume { + name: "config".to_string(), + config_map: Some(ConfigMapVolumeSource { + name: rolegroup_ref.object_name(), + ..ConfigMapVolumeSource::default() + }), + ..Volume::default() + }) + .context(AddVolumeSnafu)? + // bootstrap volume is a persistent volume template instead, to keep addresses persistent + .add_empty_dir_volume( + "log", + Some(product_logging::framework::calculate_log_volume_size_limit( + &[MAX_KAFKA_LOG_FILES_SIZE], + )), + ) + .context(AddVolumeSnafu)? + .service_account_name(service_account.name_any()) + .security_context(PodSecurityContextBuilder::new().fs_group(1000).build()); + + // Add vector container after kafka container to keep the defaulting into kafka container + if merged_config.vector_logging_enabled() { + match &kafka.spec.cluster_config.vector_aggregator_config_map_name { + Some(vector_aggregator_config_map_name) => { + pod_builder.add_container( + product_logging::framework::vector_container( + resolved_product_image, + "config", + "log", + Some(&*merged_config.vector_logging()), + ResourceRequirementsBuilder::new() + .with_cpu_request("250m") + .with_cpu_limit("500m") + .with_memory_request("128Mi") + .with_memory_limit("128Mi") + .build(), + vector_aggregator_config_map_name, + ) + .context(ConfigureLoggingSnafu)?, + ); + } + None => { + VectorAggregatorConfigMapMissingSnafu.fail()?; + } + } + } + + add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; + + let mut pod_template = pod_builder.build_template(); + + let pod_template_spec = pod_template.spec.get_or_insert_with(PodSpec::default); + // Don't run kcat pod as PID 1, to ensure that default signal handlers apply + pod_template_spec.share_process_namespace = Some(true); + + pod_template.merge_from( + kafka_role + .role_pod_overrides(kafka) + .context(MergePodOverridesSnafu)?, + ); + pod_template.merge_from( + kafka_role + .role_group_pod_overrides(kafka, &rolegroup_ref.role_group) + .context(MergePodOverridesSnafu)?, + ); + + Ok(StatefulSet { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup_ref.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + spec: Some(StatefulSetSpec { + pod_management_policy: Some("Parallel".to_string()), + replicas: kafka_role + .replicas(kafka, &rolegroup_ref.role_group) + .context(RoleGroupReplicasSnafu)? + .map(i32::from), + selector: LabelSelector { + match_labels: Some( + Labels::role_group_selector( + kafka, + APP_NAME, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ) + .context(LabelBuildSnafu)? + .into(), + ), + ..LabelSelector::default() + }, + service_name: Some(rolegroup_ref.object_name()), + template: pod_template, + volume_claim_templates: Some(pvcs), + ..StatefulSetSpec::default() + }), + status: None, + }) +} + +/// The controller rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. +#[allow(clippy::too_many_arguments)] +pub fn build_controller_rolegroup_statefulset( + kafka: &v1alpha1::KafkaCluster, + kafka_role: &KafkaRole, + resolved_product_image: &ResolvedProductImage, + rolegroup_ref: &RoleGroupRef, + controller_config: &HashMap>, + kafka_security: &KafkaTlsSecurity, + merged_config: &AnyConfig, + service_account: &ServiceAccount, + cluster_info: &KubernetesClusterInfo, +) -> Result { + let recommended_object_labels = build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ); + + let kafka_container_name = ControllerContainer::Kafka.to_string(); + let mut cb_kafka = + ContainerBuilder::new(&kafka_container_name).context(InvalidContainerNameSnafu { + name: kafka_container_name.clone(), + })?; + + let mut pod_builder = PodBuilder::new(); + + let mut env = controller_config + .get(&PropertyNameKind::Env) + .into_iter() + .flatten() + .map(|(k, v)| EnvVar { + name: k.clone(), + value: Some(v.clone()), + ..EnvVar::default() + }) + .collect::>(); + + env.push(EnvVar { + name: "NAMESPACE".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.namespace".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }); + + env.push(EnvVar { + name: "POD_NAME".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + api_version: Some("v1".to_string()), + field_path: "metadata.name".to_string(), + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }); + + env.push(EnvVar { + name: "ROLEGROUP_REF".to_string(), + value: Some(rolegroup_ref.object_name()), + ..EnvVar::default() + }); + + env.push(EnvVar { + name: "CLUSTER_DOMAIN".to_string(), + value: Some(cluster_info.cluster_domain.to_string()), + ..EnvVar::default() + }); + + let kafka_listeners = get_kafka_listener_config( + kafka, + kafka_security, + &rolegroup_ref.object_name(), + cluster_info, + ) + .context(InvalidKafkaListenersSnafu)?; + + cb_kafka + .image_from_product_image(resolved_product_image) + .command(vec![ + "/bin/bash".to_string(), + "-x".to_string(), + "-euo".to_string(), + "pipefail".to_string(), + "-c".to_string(), + ]) + .args(vec![controller_kafka_container_command( + kafka.cluster_id().context(ClusterIdMissingSnafu)?, + kafka + .pod_descriptors(kafka_role, cluster_info) + .context(BuildPodDescriptorsSnafu)?, + &kafka_listeners, + kafka_security, + &resolved_product_image.product_version, + )]) + .add_env_var( + "EXTRA_ARGS", + kafka_role + .construct_non_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + KAFKA_HEAP_OPTS, + kafka_role + .construct_heap_jvm_args(merged_config, kafka, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + kafka_log_opts_env_var(), + kafka_log_opts(&resolved_product_image.product_version), + ) + // Needed for the `containerdebug` process to log it's tracing information to. + .add_env_var( + "CONTAINERDEBUG_LOG_DIRECTORY", + format!("{STACKABLE_LOG_DIR}/containerdebug"), + ) + .add_env_var( + KAFKA_NODE_ID_OFFSET, + node_id_hash32_offset(rolegroup_ref).to_string(), + ) + .add_env_vars(env) + .add_container_ports(container_ports(kafka_security)) + .add_volume_mount(LOG_DIRS_VOLUME_NAME, STACKABLE_DATA_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("config", STACKABLE_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log-config", STACKABLE_LOG_CONFIG_DIR) + .context(AddVolumeMountSnafu)? + .add_volume_mount("log", STACKABLE_LOG_DIR) + .context(AddVolumeMountSnafu)? + .resources(merged_config.resources().clone().into()) + // TODO: improve probes + .liveness_probe(Probe { + tcp_socket: Some(TCPSocketAction { + port: IntOrString::Int(kafka_security.client_port().into()), + ..Default::default() + }), + timeout_seconds: Some(5), + period_seconds: Some(5), + ..Probe::default() + }) + .readiness_probe(Probe { + tcp_socket: Some(TCPSocketAction { + port: IntOrString::Int(kafka_security.client_port().into()), + ..Default::default() + }), + timeout_seconds: Some(5), + period_seconds: Some(5), + ..Probe::default() + }); + + if let ContainerLogConfig { + choice: + Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { + custom: ConfigMapLogConfig { config_map }, + })), + } = &*merged_config.kafka_logging() + { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(config_map) + .build(), + ) + .context(AddVolumeSnafu)?; + } else { + pod_builder + .add_volume( + VolumeBuilder::new("log-config") + .with_config_map(rolegroup_ref.object_name()) + .build(), + ) + .context(AddVolumeSnafu)?; + } + + let metadata = ObjectMetaBuilder::new() + .with_recommended_labels(recommended_object_labels) + .context(MetadataBuildSnafu)? + .build(); + + // Add TLS related volumes and volume mounts + let requested_secret_lifetime = merged_config + .deref() + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; + kafka_security + .add_controller_volume_and_volume_mounts( + &mut pod_builder, + &mut cb_kafka, + &requested_secret_lifetime, + ) + .context(AddVolumesAndVolumeMountsSnafu)?; + + // Currently, Controllers shutdown very fast, too fast in most times (flakyness) for the Brokers + // to off load properly. The Brokers then try to connect to any controllers until the + // `gracefulShutdownTimeout` is reached and the pod is finally killed. + // The `pre-stop` hook will delay the kill signal to the Controllers to provide the Brokers more + // time to offload data. + let mut kafka_container = cb_kafka.build(); + kafka_container.lifecycle = Some(Lifecycle { + pre_stop: Some(LifecycleHandler { + sleep: Some(SleepAction { seconds: 10 }), + ..Default::default() + }), + ..Default::default() + }); + + pod_builder + .metadata(metadata) + .image_pull_secrets_from_product_image(resolved_product_image) + .add_container(kafka_container) + .affinity(&merged_config.affinity) + .add_volume(Volume { + name: "config".to_string(), + config_map: Some(ConfigMapVolumeSource { + name: rolegroup_ref.object_name(), + ..ConfigMapVolumeSource::default() + }), + ..Volume::default() + }) + .context(AddVolumeSnafu)? + // bootstrap volume is a persistent volume template instead, to keep addresses persistent + .add_empty_dir_volume( + "log", + Some(product_logging::framework::calculate_log_volume_size_limit( + &[MAX_KAFKA_LOG_FILES_SIZE], + )), + ) + .context(AddVolumeSnafu)? + .service_account_name(service_account.name_any()) + .security_context(PodSecurityContextBuilder::new().fs_group(1000).build()); + + // Add vector container after kafka container to keep the defaulting into kafka container + if merged_config.vector_logging_enabled() { + match &kafka.spec.cluster_config.vector_aggregator_config_map_name { + Some(vector_aggregator_config_map_name) => { + pod_builder.add_container( + product_logging::framework::vector_container( + resolved_product_image, + "config", + "log", + Some(&*merged_config.vector_logging()), + ResourceRequirementsBuilder::new() + .with_cpu_request("250m") + .with_cpu_limit("500m") + .with_memory_request("128Mi") + .with_memory_limit("128Mi") + .build(), + vector_aggregator_config_map_name, + ) + .context(ConfigureLoggingSnafu)?, + ); + } + None => { + VectorAggregatorConfigMapMissingSnafu.fail()?; + } + } + } + + add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; + + let mut pod_template = pod_builder.build_template(); + + pod_template.merge_from( + kafka_role + .role_pod_overrides(kafka) + .context(MergePodOverridesSnafu)?, + ); + pod_template.merge_from( + kafka_role + .role_group_pod_overrides(kafka, &rolegroup_ref.role_group) + .context(MergePodOverridesSnafu)?, + ); + + Ok(StatefulSet { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(kafka) + .name(rolegroup_ref.object_name()) + .ownerreference_from_resource(kafka, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + kafka, + KAFKA_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(MetadataBuildSnafu)? + .build(), + spec: Some(StatefulSetSpec { + pod_management_policy: Some("Parallel".to_string()), + update_strategy: Some(StatefulSetUpdateStrategy { + type_: Some("RollingUpdate".to_string()), + ..StatefulSetUpdateStrategy::default() + }), + replicas: kafka_role + .replicas(kafka, &rolegroup_ref.role_group) + .context(RoleGroupReplicasSnafu)? + .map(i32::from), + selector: LabelSelector { + match_labels: Some( + Labels::role_group_selector( + kafka, + APP_NAME, + &rolegroup_ref.role, + &rolegroup_ref.role_group, + ) + .context(LabelBuildSnafu)? + .into(), + ), + ..LabelSelector::default() + }, + service_name: Some(rolegroup_ref.object_name()), + template: pod_template, + volume_claim_templates: Some(merged_config.resources().storage.build_pvcs()), + ..StatefulSetSpec::default() + }), + status: None, + }) +} + +/// We only expose client HTTP / HTTPS and Metrics ports. +fn container_ports(kafka_security: &KafkaTlsSecurity) -> Vec { + let mut ports = vec![ + ContainerPort { + name: Some(METRICS_PORT_NAME.to_string()), + container_port: METRICS_PORT.into(), + protocol: Some("TCP".to_string()), + ..ContainerPort::default() + }, + ContainerPort { + name: Some(kafka_security.client_port_name().to_string()), + container_port: kafka_security.client_port().into(), + protocol: Some("TCP".to_string()), + ..ContainerPort::default() + }, + ]; + if kafka_security.has_kerberos_enabled() { + ports.push(ContainerPort { + name: Some(kafka_security.bootstrap_port_name().to_string()), + container_port: kafka_security.bootstrap_port().into(), + protocol: Some("TCP".to_string()), + ..ContainerPort::default() + }); + } + ports +} diff --git a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 index 08f1661a..2c8a1532 100644 --- a/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/30-stop-kafka.yaml.j2 @@ -21,9 +21,6 @@ spec: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} zookeeperConfigMapName: test-zk - clusterOperation: - stopped: true - reconciliationPaused: false brokers: config: logging: @@ -31,3 +28,6 @@ spec: roleGroups: default: replicas: 1 + clusterOperation: + stopped: true + reconciliationPaused: false diff --git a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 index c961ca4d..18682f32 100644 --- a/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/40-pause-kafka.yaml.j2 @@ -21,9 +21,6 @@ spec: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} zookeeperConfigMapName: test-zk - clusterOperation: - stopped: false - reconciliationPaused: true brokers: config: logging: @@ -31,3 +28,6 @@ spec: roleGroups: default: replicas: 1 + clusterOperation: + stopped: false + reconciliationPaused: true diff --git a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 index 81d43637..bece9335 100644 --- a/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 +++ b/tests/templates/kuttl/cluster-operation/50-restart-kafka.yaml.j2 @@ -20,9 +20,6 @@ spec: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} zookeeperConfigMapName: test-zk - clusterOperation: - stopped: false - reconciliationPaused: false brokers: config: logging: @@ -30,3 +27,6 @@ spec: roleGroups: default: replicas: 1 + clusterOperation: + stopped: false + reconciliationPaused: false diff --git a/tests/templates/kuttl/configuration/20-assert.yaml.j2 b/tests/templates/kuttl/configuration/10-assert.yaml.j2 similarity index 94% rename from tests/templates/kuttl/configuration/20-assert.yaml.j2 rename to tests/templates/kuttl/configuration/10-assert.yaml.j2 index f3f09708..3de5ea66 100644 --- a/tests/templates/kuttl/configuration/20-assert.yaml.j2 +++ b/tests/templates/kuttl/configuration/10-assert.yaml.j2 @@ -17,12 +17,12 @@ spec: # value set in the role configuration cpu: 500m # value set in the rolegroup configuration - memory: 2Gi + memory: 3Gi requests: # default value set by the operator cpu: 250m # value set in the rolegroup configuration - memory: 2Gi + memory: 3Gi - name: kcat-prober {% if lookup('env', 'VECTOR_AGGREGATOR') %} - name: vector diff --git a/tests/templates/kuttl/configuration/20-install-kafka.yaml.j2 b/tests/templates/kuttl/configuration/10-install-kafka.yaml.j2 similarity index 59% rename from tests/templates/kuttl/configuration/20-install-kafka.yaml.j2 rename to tests/templates/kuttl/configuration/10-install-kafka.yaml.j2 index eefa7550..af9f97e8 100644 --- a/tests/templates/kuttl/configuration/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/configuration/10-install-kafka.yaml.j2 @@ -18,10 +18,10 @@ spec: pullPolicy: IfNotPresent clusterConfig: {% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} - zookeeperConfigMapName: test-zk - brokers: + controllers: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} @@ -37,7 +37,7 @@ spec: storage: logDirs: # Override the default value '2Gi' set by the operator - capacity: '1Gi' + capacity: '1.5Gi' roleGroups: default: config: @@ -50,6 +50,40 @@ spec: memory: # Override the default value '1Gi' set by the operator limit: '2Gi' + storage: + logDirs: + # Override the value '1.5Gi' set in the role configuration + capacity: '1Gi' + replicas: 1 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + resources: + cpu: + # Inherit the default value '250m' set by the operator + # min: '250m' + # Override the default value '1' set by the operator + max: '500m' + # memory: + # Inherit the default value '2Gi' set by the operator + # limit: '2Gi' + storage: + logDirs: + # Override the default value '2Gi' set by the operator + capacity: '1.5Gi' + roleGroups: + default: + config: + resources: + # cpu: + # Inherit the default value '250m' set by the operator + # min: '250m' + # Inherit the value '1000m' set in the role configuration + # max: '1000m' + memory: + # Override the default value '2Gi' set by the operator + limit: '3Gi' storage: logDirs: # Override the value '2Gi' set in the role configuration diff --git a/tests/templates/kuttl/configuration/10-install-zk.yaml.j2 b/tests/templates/kuttl/configuration/10-install-zk.yaml.j2 deleted file mode 100644 index b402b25d..00000000 --- a/tests/templates/kuttl/configuration/10-install-zk.yaml.j2 +++ /dev/null @@ -1,24 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -timeout: 300 ---- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperCluster -metadata: - name: test-zk -spec: - image: - productVersion: "{{ test_scenario['values']['zookeeper-latest'] }}" - pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - clusterConfig: - vectorAggregatorConfigMapName: vector-aggregator-discovery -{% endif %} - servers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - roleGroups: - default: - replicas: 1 diff --git a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 index be8651b3..854c3734 100644 --- a/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/02-install-kafka.yaml.j2 @@ -23,6 +23,7 @@ spec: zookeeperConfigMapName: test-zk brokers: config: + gracefulShutdownTimeout: 30s # speed up tests logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: diff --git a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 index 3c7824a8..38853bd6 100644 --- a/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 +++ b/tests/templates/kuttl/delete-rolegroup/03-delete-secondary.yaml.j2 @@ -22,6 +22,7 @@ spec: zookeeperConfigMapName: test-zk brokers: config: + gracefulShutdownTimeout: 30s # speed up tests logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: diff --git a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 index a03d03ed..0099c2b2 100644 --- a/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/kerberos/20-install-kafka.yaml.j2 @@ -36,7 +36,6 @@ commands: {% endif %} pullPolicy: IfNotPresent clusterConfig: - zookeeperConfigMapName: test-kafka-znode authentication: - authenticationClass: kerberos-auth-$NAMESPACE tls: @@ -45,12 +44,14 @@ commands: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} + zookeeperConfigMapName: test-kafka-znode brokers: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} brokerListenerClass: {{ test_scenario['values']['broker-listener-class'] }} bootstrapListenerClass: {{ test_scenario['values']['bootstrap-listener-class'] }} + gracefulShutdownTimeout: 30s # speed up tests roleGroups: default: replicas: 3 diff --git a/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml b/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml.j2 similarity index 60% rename from tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml rename to tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml.j2 index df71fa4d..38a2a86b 100644 --- a/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml +++ b/tests/templates/kuttl/logging/03-create-configmap-with-prepared-logs.yaml.j2 @@ -4,5 +4,9 @@ kind: TestStep commands: - script: > kubectl create configmap prepared-logs +{% if test_scenario['values']['kafka'].startswith('3.') %} --from-file=prepared-logs.log4j.xml +{% else %} + --from-file=prepared-logs.log4j2.xml +{% endif %} --namespace=$NAMESPACE diff --git a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 index e83fc50b..42588924 100644 --- a/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/logging/04-install-kafka.yaml.j2 @@ -4,6 +4,7 @@ kind: ConfigMap metadata: name: kafka-log-config data: +{% if test_scenario['values']['kafka'].startswith('3.') %} log4j.properties: | log4j.rootLogger=INFO, CONSOLE, FILE @@ -18,6 +19,19 @@ data: log4j.appender.FILE.MaxFileSize=5MB log4j.appender.FILE.MaxBackupIndex=1 log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout +{% else %} + log4j2.properties: |- + appenders = FILE + + appender.FILE.type = File + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/kafka/kafka.log4j2.xml + appender.FILE.layout.type = XMLLayout + + rootLogger.level=INFO + rootLogger.appenderRefs = FILE + rootLogger.appenderRef.FILE.ref = FILE +{% endif %} --- apiVersion: kafka.stackable.tech/v1alpha1 kind: KafkaCluster diff --git a/tests/templates/kuttl/logging/prepared-logs.log4j2.xml b/tests/templates/kuttl/logging/prepared-logs.log4j2.xml new file mode 100644 index 00000000..37474797 --- /dev/null +++ b/tests/templates/kuttl/logging/prepared-logs.log4j2.xml @@ -0,0 +1,72 @@ + + + Valid log event with all possible tags and attributes + + + + + + + + + + Valid log event without the Instant tag + + + + + Invalid log event without epochSecond + + + + + Invalid log event without nanoOfSecond + + + + + Invalid log event with invalid epochSecond + + + + Invalid log event without a timestamp + + + + Invalid log event with invalid timeMillis + + + + + Invalid log event without a logger + + + + + Invalid log event without a level + + + + + Invalid log event with an unknown level + + + + + + + + + + Invalid log event without the Event tag + + + + Unparsable log event + + + + + Valid log event after the unparsable one + diff --git a/tests/templates/kuttl/logging/test_log_aggregation.py b/tests/templates/kuttl/logging/test_log_aggregation.py index ebc38cd8..c27b1992 100755 --- a/tests/templates/kuttl/logging/test_log_aggregation.py +++ b/tests/templates/kuttl/logging/test_log_aggregation.py @@ -4,9 +4,9 @@ def check_sent_events(): response = requests.post( - 'http://kafka-vector-aggregator:8686/graphql', + "http://kafka-vector-aggregator:8686/graphql", json={ - 'query': """ + "query": """ { transforms(first:100) { nodes { @@ -20,29 +20,30 @@ def check_sent_events(): } } """ - } + }, ) - assert response.status_code == 200, \ - 'Cannot access the API of the vector aggregator.' + assert response.status_code == 200, ( + "Cannot access the API of the vector aggregator." + ) result = response.json() - transforms = result['data']['transforms']['nodes'] + transforms = result["data"]["transforms"]["nodes"] for transform in transforms: - sentEvents = transform['metrics']['sentEventsTotal'] - componentId = transform['componentId'] + sentEvents = transform["metrics"]["sentEventsTotal"] + componentId = transform["componentId"] - if componentId == 'filteredInvalidEvents': - assert sentEvents is None or \ - sentEvents['sentEventsTotal'] == 0, \ - 'Invalid log events were sent.' + if componentId == "filteredInvalidEvents": + assert sentEvents is None or sentEvents["sentEventsTotal"] == 0, ( + "Invalid log events were sent." + ) else: - assert sentEvents is not None and \ - sentEvents['sentEventsTotal'] > 0, \ - f'No events were sent in "{componentId}".' + assert sentEvents is not None and sentEvents["sentEventsTotal"] > 0, ( + f'No events were sent in "{componentId}".' + ) -if __name__ == '__main__': +if __name__ == "__main__": check_sent_events() - print('Test successful!') + print("Test successful!") diff --git a/tests/templates/kuttl/operations-kraft/00-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/00-assert.yaml.j2 new file mode 100644 index 00000000..50b1d4c3 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/00-assert.yaml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/00-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/operations-kraft/00-install-vector-aggregator-discovery-configmap.yaml.j2 new file mode 100644 index 00000000..2d6a0df5 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/00-install-vector-aggregator-discovery-configmap.yaml.j2 @@ -0,0 +1,9 @@ +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }} +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/00-patch-ns.yaml.j2 b/tests/templates/kuttl/operations-kraft/00-patch-ns.yaml.j2 new file mode 100644 index 00000000..67185acf --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/00-patch-ns.yaml.j2 @@ -0,0 +1,9 @@ +{% if test_scenario['values']['openshift'] == 'true' %} +# see https://github.com/stackabletech/issues/issues/566 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: kubectl patch namespace $NAMESPACE -p '{"metadata":{"labels":{"pod-security.kubernetes.io/enforce":"privileged"}}}' + timeout: 120 +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/20-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/20-assert.yaml.j2 new file mode 100644 index 00000000..6ac28598 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/20-assert.yaml.j2 @@ -0,0 +1,22 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 new file mode 100644 index 00000000..655d26a5 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 @@ -0,0 +1,36 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/25-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/25-assert.yaml.j2 new file mode 100644 index 00000000..9ba36657 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/25-assert.yaml.j2 @@ -0,0 +1,20 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=reconciliationPaused kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + replicas: 3 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 new file mode 100644 index 00000000..0e851efa --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 @@ -0,0 +1,39 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: true diff --git a/tests/templates/kuttl/operations-kraft/30-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/30-assert.yaml.j2 new file mode 100644 index 00000000..eba45c7c --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/30-assert.yaml.j2 @@ -0,0 +1,20 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=stopped kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + replicas: 0 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + replicas: 0 diff --git a/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 new file mode 100644 index 00000000..70e9f713 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 @@ -0,0 +1,39 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: true + reconciliationPaused: false diff --git a/tests/templates/kuttl/operations-kraft/50-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/50-assert.yaml.j2 new file mode 100644 index 00000000..ff21429a --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/50-assert.yaml.j2 @@ -0,0 +1,21 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 diff --git a/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 new file mode 100644 index 00000000..758b9e04 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 @@ -0,0 +1,38 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: false diff --git a/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 new file mode 100644 index 00000000..9209b5ea --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/60-assert.yaml.j2 @@ -0,0 +1,23 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 5 + replicas: 5 +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 new file mode 100644 index 00000000..718c760e --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 @@ -0,0 +1,40 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 5 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: false +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 new file mode 100644 index 00000000..d8da05ff --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/70-assert.yaml.j2 @@ -0,0 +1,23 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available kafkaclusters.kafka.stackable.tech/test-kafka --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 3 + replicas: 3 +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-default +status: + readyReplicas: 3 + replicas: 3 +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 new file mode 100644 index 00000000..a8073228 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 @@ -0,0 +1,40 @@ +{% if not test_scenario['values']['kafka-kraft'].startswith("3.7") %} +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 300 +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + controllers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + brokers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 3 + clusterOperation: + stopped: false + reconciliationPaused: false +{% endif %} diff --git a/tests/templates/kuttl/operations-kraft/README.md b/tests/templates/kuttl/operations-kraft/README.md new file mode 100644 index 00000000..5c0fa86b --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/README.md @@ -0,0 +1,14 @@ +Tests Kraft cluster operations: + +- Cluster stop/pause/restart +- Scale brokers up/down +- Scale controllers up/down + +Notes + +- Kafka 3.7 controllers do not scale at all. + The scaling test steps are disabled for this version. +- Scaling controllers from 3 -> 1 doesn't work. + Both brokers and controllers try to communicate with old controllers. + This is why, the last step scales from 5 -> 3 controllers. + This at least, leaves the cluster in a working state. diff --git a/tests/templates/kuttl/smoke-kraft/00-limit-range.yaml b/tests/templates/kuttl/smoke-kraft/00-limit-range.yaml new file mode 100644 index 00000000..7b6cb30e --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/00-limit-range.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: v1 +kind: LimitRange +metadata: + name: limit-request-ratio +spec: + limits: + - type: "Container" + maxLimitRequestRatio: + cpu: 5 + memory: 1 diff --git a/tests/templates/kuttl/smoke-kraft/00-patch-ns.yaml.j2 b/tests/templates/kuttl/smoke-kraft/00-patch-ns.yaml.j2 new file mode 100644 index 00000000..67185acf --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/00-patch-ns.yaml.j2 @@ -0,0 +1,9 @@ +{% if test_scenario['values']['openshift'] == 'true' %} +# see https://github.com/stackabletech/issues/issues/566 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: kubectl patch namespace $NAMESPACE -p '{"metadata":{"labels":{"pod-security.kubernetes.io/enforce":"privileged"}}}' + timeout: 120 +{% endif %} diff --git a/tests/templates/kuttl/smoke-kraft/00-rbac.yaml.j2 b/tests/templates/kuttl/smoke-kraft/00-rbac.yaml.j2 new file mode 100644 index 00000000..7ee61d23 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/00-rbac.yaml.j2 @@ -0,0 +1,29 @@ +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-role +rules: +{% if test_scenario['values']['openshift'] == "true" %} + - apiGroups: ["security.openshift.io"] + resources: ["securitycontextconstraints"] + resourceNames: ["privileged"] + verbs: ["use"] +{% endif %} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: test-sa +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-rb +subjects: + - kind: ServiceAccount + name: test-sa +roleRef: + kind: Role + name: test-role + apiGroup: rbac.authorization.k8s.io diff --git a/tests/templates/kuttl/smoke-kraft/01-assert.yaml.j2 b/tests/templates/kuttl/smoke-kraft/01-assert.yaml.j2 new file mode 100644 index 00000000..892ae718 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/01-assert.yaml.j2 @@ -0,0 +1,8 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery diff --git a/tests/templates/kuttl/smoke-kraft/01-install-kafka-vector-aggregator.yaml b/tests/templates/kuttl/smoke-kraft/01-install-kafka-vector-aggregator.yaml new file mode 100644 index 00000000..6c7b01cc --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/01-install-kafka-vector-aggregator.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: >- + helm install kafka-vector-aggregator vector + --namespace $NAMESPACE + --version 0.43.0 + --repo https://helm.vector.dev + --values kafka-vector-aggregator-values.yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: kafka-vector-aggregator:6123 diff --git a/tests/templates/kuttl/smoke-kraft/03-create-configmap-with-prepared-logs.yaml.j2 b/tests/templates/kuttl/smoke-kraft/03-create-configmap-with-prepared-logs.yaml.j2 new file mode 100644 index 00000000..11b3d373 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/03-create-configmap-with-prepared-logs.yaml.j2 @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: > + kubectl create configmap prepared-logs +{% if test_scenario['values']['kafka-kraft'].startswith('3.') %} + --from-file=prepared-logs.log4j.xml +{% else %} + --from-file=prepared-logs.log4j2.xml +{% endif %} + --namespace=$NAMESPACE diff --git a/tests/templates/kuttl/smoke-kraft/30-assert.yaml.j2 b/tests/templates/kuttl/smoke-kraft/30-assert.yaml.j2 new file mode 100644 index 00000000..02f55756 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/30-assert.yaml.j2 @@ -0,0 +1,44 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-automatic-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-broker-custom-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-automatic-log-config +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-kafka-controller-custom-log-config +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 new file mode 100644 index 00000000..282686e9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 @@ -0,0 +1,200 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kafka-log-config +data: +{% if test_scenario['values']['kafka-kraft'].startswith('3.') %} + log4j.properties: | + log4j.rootLogger=INFO, CONSOLE, FILE + + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=INFO + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=[%d] %p %m (%c)%n + + log4j.appender.FILE=org.apache.log4j.RollingFileAppender + log4j.appender.FILE.Threshold=INFO + log4j.appender.FILE.File=/stackable/log/kafka/kafka.log4j.xml + log4j.appender.FILE.MaxFileSize=5MB + log4j.appender.FILE.MaxBackupIndex=1 + log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout +{% else %} + log4j2.properties: |- + appenders = FILE + + appender.FILE.type = File + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/kafka/kafka.log4j2.xml + appender.FILE.layout.type = XMLLayout + + rootLogger.level=INFO + rootLogger.appenderRefs = FILE + rootLogger.appenderRef.FILE.ref = FILE +{% endif %} +--- +apiVersion: authentication.stackable.tech/v1alpha1 +kind: AuthenticationClass +metadata: + name: test-kafka-client-auth-tls +spec: + provider: + tls: + clientCertSecretClass: test-kafka-client-auth-tls +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: test-kafka-client-auth-tls +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-tls-kafka-client-auth-ca + namespace: default + autoGenerate: true +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: test-kafka +spec: + image: +{% if test_scenario['values']['kafka-kraft'].find(",") > 0 %} + custom: "{{ test_scenario['values']['kafka-kraft'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['kafka-kraft'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" +{% endif %} + pullPolicy: IfNotPresent + clusterConfig: + authentication: + - authenticationClass: test-kafka-client-auth-tls + tls: + serverSecretClass: tls + vectorAggregatorConfigMapName: vector-aggregator-discovery + controllers: + envOverrides: + COMMON_VAR: role-value # overridden by role group below + ROLE_VAR: role-value # only defined here at role level + config: + logging: + enableVectorAgent: true + requestedSecretLifetime: 7d + roleGroups: + automatic-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + vector: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + podOverrides: + spec: + containers: + - name: vector + volumeMounts: + - name: prepared-logs + mountPath: /stackable/log/prepared-logs + volumes: + - name: prepared-logs + configMap: + name: prepared-logs + custom-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + custom: + configMap: kafka-log-config + brokers: + configOverrides: + broker.properties: + compression.type: uncompressed # overridden by role group below + controller.quorum.election.backoff.max.ms: "2000" + envOverrides: + COMMON_VAR: role-value # overridden by role group below + ROLE_VAR: role-value # only defined here at role level + config: + logging: + enableVectorAgent: true + requestedSecretLifetime: 7d + roleGroups: + default: + replicas: 1 + envOverrides: + COMMON_VAR: group-value # overrides role value + GROUP_VAR: group-value # only defined here at group level + configOverrides: + broker.properties: + compression.type: snappy + controller.quorum.fetch.timeout.ms: "3000" + podOverrides: + spec: + containers: + - name: kafka + resources: + requests: + cpu: 300m + limits: + cpu: 1100m + automatic-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + vector: + console: + level: INFO + file: + level: INFO + loggers: + ROOT: + level: INFO + podOverrides: + spec: + containers: + - name: vector + volumeMounts: + - name: prepared-logs + mountPath: /stackable/log/prepared-logs + volumes: + - name: prepared-logs + configMap: + name: prepared-logs + custom-log-config: + replicas: 1 + config: + logging: + enableVectorAgent: true + containers: + kafka: + custom: + configMap: kafka-log-config diff --git a/tests/templates/kuttl/smoke-kraft/31-assert.yaml b/tests/templates/kuttl/smoke-kraft/31-assert.yaml new file mode 100644 index 00000000..26a55394 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/31-assert.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + # + # Test envOverrides + # + - script: | + kubectl -n $NAMESPACE get sts test-kafka-broker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "kafka") | .env[] | select (.name == "COMMON_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts test-kafka-broker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "kafka") | .env[] | select (.name == "GROUP_VAR" and .value == "group-value")' + kubectl -n $NAMESPACE get sts test-kafka-broker-default -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "kafka") | .env[] | select (.name == "ROLE_VAR" and .value == "role-value")' + # + # Test configOverrides + # + - script: | + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "compression.type=snappy" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.election.backoff.max.ms=2000" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.fetch.timeout.ms=3000" diff --git a/tests/templates/kuttl/smoke-kraft/32-assert.yaml b/tests/templates/kuttl/smoke-kraft/32-assert.yaml new file mode 100644 index 00000000..32d3ca11 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/32-assert.yaml @@ -0,0 +1,7 @@ +--- +# This test checks if the containerdebug-state.json file is present and valid +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + - script: kubectl exec -n $NAMESPACE --container kafka test-kafka-broker-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' diff --git a/tests/templates/kuttl/upgrade/01-assert.yaml b/tests/templates/kuttl/smoke-kraft/40-assert.yaml similarity index 73% rename from tests/templates/kuttl/upgrade/01-assert.yaml rename to tests/templates/kuttl/smoke-kraft/40-assert.yaml index c9cfcf5c..58987778 100644 --- a/tests/templates/kuttl/upgrade/01-assert.yaml +++ b/tests/templates/kuttl/smoke-kraft/40-assert.yaml @@ -1,12 +1,14 @@ --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert +metadata: + name: install-test-container timeout: 300 --- apiVersion: apps/v1 kind: StatefulSet metadata: - name: test-zk-server-default + name: python status: readyReplicas: 1 replicas: 1 diff --git a/tests/templates/kuttl/smoke-kraft/40-install-test-container.yaml b/tests/templates/kuttl/smoke-kraft/40-install-test-container.yaml new file mode 100644 index 00000000..d2d79cc7 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/40-install-test-container.yaml @@ -0,0 +1,35 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: install-test-container +timeout: 300 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: python + labels: + app: python +spec: + replicas: 1 + selector: + matchLabels: + app: python + template: + metadata: + labels: + app: python + spec: + containers: + - name: webhdfs + image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev + stdin: true + tty: true + resources: + requests: + memory: "128Mi" + cpu: "512m" + limits: + memory: "128Mi" + cpu: "1" diff --git a/tests/templates/kuttl/smoke-kraft/50-assert.yaml b/tests/templates/kuttl/smoke-kraft/50-assert.yaml new file mode 100644 index 00000000..7cb89e3d --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/50-assert.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: metrics +commands: + - script: kubectl exec -n $NAMESPACE python-0 -- python /tmp/metrics.py + - script: kubectl exec -n $NAMESPACE test-kafka-broker-default-0 -- /tmp/test_heap.sh + - script: kubectl exec -n $NAMESPACE python-0 -- python /tmp/test_log_aggregation.py -n $NAMESPACE diff --git a/tests/templates/kuttl/smoke-kraft/50-prepare-test.yaml b/tests/templates/kuttl/smoke-kraft/50-prepare-test.yaml new file mode 100644 index 00000000..6895678f --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/50-prepare-test.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: kafka-tests +commands: + - script: kubectl cp -n $NAMESPACE ./metrics.py python-0:/tmp + - script: kubectl cp -n $NAMESPACE ./test_heap.sh test-kafka-broker-default-0:/tmp + - script: kubectl cp -n $NAMESPACE ./test_log_aggregation.py python-0:/tmp diff --git a/tests/templates/kuttl/smoke-kraft/60-assert.yaml b/tests/templates/kuttl/smoke-kraft/60-assert.yaml new file mode 100644 index 00000000..828b4be9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/60-assert.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: test-scripts diff --git a/tests/templates/kuttl/smoke-kraft/60-install-test-scripts-configmap.yaml b/tests/templates/kuttl/smoke-kraft/60-install-test-scripts-configmap.yaml new file mode 100644 index 00000000..959bbce5 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/60-install-test-scripts-configmap.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl create configmap test-scripts \ + --namespace $NAMESPACE \ + --from-file=test_client_auth_tls.sh=60_test_client_auth_tls.sh \ + --from-file=wrong_keystore.p12=60_wrong_keystore.p12 \ + --from-file=wrong_truststore.p12=60_wrong_truststore.p12 diff --git a/tests/templates/kuttl/smoke-kraft/60_test_client_auth_tls.sh b/tests/templates/kuttl/smoke-kraft/60_test_client_auth_tls.sh new file mode 100755 index 00000000..bae7473b --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/60_test_client_auth_tls.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Usage: test_client_auth_tls.sh namespace + +# to be safe +unset TOPIC +unset BAD_TOPIC + +echo "Connecting to boostrap address $KAFKA" + +echo "Start client auth TLS testing..." +############################################################################ +# Test the secured connection +############################################################################ +# create random topics +TOPIC=$(tr -dc A-Za-z0-9 /tmp/client.config + +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$TOPIC" --bootstrap-server "$KAFKA" --command-config /tmp/client.config +then + echo "[SUCCESS] Secure client topic created!" +else + echo "[ERROR] Secure client topic creation failed!" + exit 1 +fi + +if /stackable/kafka/bin/kafka-topics.sh --list --topic "$TOPIC" --bootstrap-server "$KAFKA" --command-config /tmp/client.config | grep "$TOPIC" +then + echo "[SUCCESS] Secure client topic read!" +else + echo "[ERROR] Secure client topic read failed!" + exit 1 +fi + +############################################################################ +# Test the connection without certificates +############################################################################ +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$BAD_TOPIC" --bootstrap-server "$KAFKA" &> /dev/null +then + echo "[ERROR] Secure client topic created without certificates!" + exit 1 +else + echo "[SUCCESS] Secure client topic creation failed without certificates!" +fi + +############################################################################ +# Test the connection with bad host name +############################################################################ +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$BAD_TOPIC" --bootstrap-server localhost:9093 --command-config /tmp/client.config &> /dev/null +then + echo "[ERROR] Secure client topic created with bad host name!" + exit 1 +else + echo "[SUCCESS] Secure client topic creation failed with bad host name!" +fi + +############################################################################ +# Test the connection with bad certificate +############################################################################ +echo $'security.protocol=SSL\nssl.keystore.location=/tmp/wrong_keystore.p12\nssl.keystore.password=changeit\nssl.truststore.location=/tmp/wrong_truststore.p12\nssl.truststore.password=changeit' > /tmp/client.config +if /stackable/kafka/bin/kafka-topics.sh --create --topic "$BAD_TOPIC" --bootstrap-server "$KAFKA" --command-config /tmp/client.config &> /dev/null +then + echo "[ERROR] Secure client topic created with wrong certificate!" + exit 1 +else + echo "[SUCCESS] Secure client topic creation failed with wrong certificate!" +fi + +echo "All client auth TLS tests successful!" +exit 0 diff --git a/tests/templates/kuttl/smoke-kraft/60_wrong_keystore.p12 b/tests/templates/kuttl/smoke-kraft/60_wrong_keystore.p12 new file mode 100644 index 00000000..e5dc3a42 Binary files /dev/null and b/tests/templates/kuttl/smoke-kraft/60_wrong_keystore.p12 differ diff --git a/tests/templates/kuttl/smoke-kraft/60_wrong_truststore.p12 b/tests/templates/kuttl/smoke-kraft/60_wrong_truststore.p12 new file mode 100644 index 00000000..0eca7262 Binary files /dev/null and b/tests/templates/kuttl/smoke-kraft/60_wrong_truststore.p12 differ diff --git a/tests/templates/kuttl/smoke-kraft/70-assert.yaml b/tests/templates/kuttl/smoke-kraft/70-assert.yaml new file mode 100644 index 00000000..52cbcc7b --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/70-assert.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: test-tls +status: + succeeded: 1 diff --git a/tests/templates/kuttl/smoke-kraft/70-run-tests.yaml.j2 b/tests/templates/kuttl/smoke-kraft/70-run-tests.yaml.j2 new file mode 100644 index 00000000..22b3e3f2 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/70-run-tests.yaml.j2 @@ -0,0 +1,13 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # Use the same Kafka image in the test Job as in the Kafka broker StatefulSet + - script: >- + KAFKA_IMAGE=$( + kubectl get statefulsets.apps test-kafka-broker-default + --namespace $NAMESPACE + --output=jsonpath='{.spec.template.spec.containers[?(.name=="kafka")].image}' + ) + envsubst < 70_test-tls-job.yaml | + kubectl apply --namespace $NAMESPACE --filename - diff --git a/tests/templates/kuttl/smoke-kraft/70_test-tls-job.yaml.j2 b/tests/templates/kuttl/smoke-kraft/70_test-tls-job.yaml.j2 new file mode 100644 index 00000000..f9da65e1 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/70_test-tls-job.yaml.j2 @@ -0,0 +1,73 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: test-tls +spec: + template: + spec: + containers: + - name: kafka + image: ${KAFKA_IMAGE} + workingDir: /stackable/test + command: + - ./test_client_auth_tls.sh + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 200m + memory: 512Mi + env: + - name: KAFKA + valueFrom: + configMapKeyRef: + name: test-kafka + key: KAFKA + volumeMounts: + - name: test-scripts + mountPath: /stackable/test + - mountPath: /stackable/tls_keystore_internal + name: tls-keystore-internal + - mountPath: /stackable/tls_keystore_server + name: tls-keystore-server + volumes: + - name: test-scripts + configMap: + name: test-scripts + defaultMode: 0777 + - name: tls-keystore-server + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: test-kafka-client-auth-tls + secrets.stackable.tech/format: tls-pkcs12 + secrets.stackable.tech/scope: pod,node + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + - name: tls-keystore-internal + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: tls + secrets.stackable.tech/format: tls-pkcs12 + secrets.stackable.tech/scope: pod,node + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + securityContext: + fsGroup: 1000 + serviceAccountName: test-sa + restartPolicy: OnFailure diff --git a/tests/templates/kuttl/smoke-kraft/README.md b/tests/templates/kuttl/smoke-kraft/README.md new file mode 100644 index 00000000..319d8f04 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/README.md @@ -0,0 +1,10 @@ + +# Kraft test bundle + +To reduce the number of tests, this one ("smoke-kraft") bundles multiple tests into one: + +* smoke +* logging +* tls (always enabled) + +This test doesn't install any zookeeper servers and only runs in Kraft mode (as the name implies). diff --git a/tests/templates/kuttl/smoke-kraft/kafka-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/smoke-kraft/kafka-vector-aggregator-values.yaml.j2 new file mode 100644 index 00000000..67eed310 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/kafka-vector-aggregator-values.yaml.j2 @@ -0,0 +1,104 @@ +--- +role: Aggregator +service: + ports: + - name: api + port: 8686 + protocol: TCP + targetPort: 8686 + - name: vector + port: 6123 + protocol: TCP + targetPort: 6000 +# resources -- Set Vector resource requests and limits. +resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 200m + memory: 512Mi +customConfig: + api: + address: 0.0.0.0:8686 + enabled: true + sources: + vector: + address: 0.0.0.0:6000 + type: vector + version: "2" + transforms: + validEvents: + type: filter + inputs: [vector] + condition: is_null(.errors) + filteredAutomaticLogConfigBrokerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-automatic-log-config-0" && + .container == "kafka" + filteredAutomaticLogConfigBrokerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-automatic-log-config-0" && + .container == "vector" + filteredCustomLogConfigBrokerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-custom-log-config-0" && + .container == "kafka" + filteredCustomLogConfigBrokerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-broker-custom-log-config-0" && + .container == "vector" + filteredAutomaticLogConfigControllerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-automatic-log-config-0" && + .container == "kafka" + filteredAutomaticLogConfigControllerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-automatic-log-config-0" && + .container == "vector" + filteredCustomLogConfigControllerKafka: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-custom-log-config-0" && + .container == "kafka" + filteredCustomLogConfigControllerVector: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-kafka-controller-custom-log-config-0" && + .container == "vector" + filteredInvalidEvents: + type: filter + inputs: [vector] + condition: |- + .timestamp == from_unix_timestamp!(0) || + is_null(.level) || + is_null(.logger) || + is_null(.message) + sinks: + test: + inputs: [filtered*] + type: blackhole +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + aggregator: + inputs: [vector] + type: vector + address: {{ lookup('env', 'VECTOR_AGGREGATOR') }} + buffer: + # Avoid back pressure from VECTOR_AGGREGATOR. The test should + # not fail if the aggregator is not available. + when_full: drop_newest +{% endif %} diff --git a/tests/templates/kuttl/smoke-kraft/metrics.py b/tests/templates/kuttl/smoke-kraft/metrics.py new file mode 100644 index 00000000..7c9f8027 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/metrics.py @@ -0,0 +1,19 @@ +import sys +import logging +import requests + +if __name__ == "__main__": + result = 0 + + LOG_LEVEL = "DEBUG" # if args.debug else 'INFO' + logging.basicConfig( + level=LOG_LEVEL, + format="%(asctime)s %(levelname)s: %(message)s", + stream=sys.stdout, + ) + + http_code = requests.get("http://test-kafka-broker-default:9606").status_code + if http_code != 200: + result = 1 + + sys.exit(result) diff --git a/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j.xml b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j.xml new file mode 100644 index 00000000..707d10c9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j.xml @@ -0,0 +1,67 @@ + + Valid log event with all possible tags and attributes + TestException + + + + + Invalid log event without a timestamp + + + + + Invalid log event with an unparsable timestamp + + + + + Invalid log event without a logger + + + + + Invalid log event without a level + + + + + Invalid log event with an unknown level + + + + + + + + + + Valid log event before the one with the noevent tag + + + + + Invalid log event without the event tag + + + + + Unparsable log event + + + + Valid log event after the unparsable one + + diff --git a/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j2.xml b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j2.xml new file mode 100644 index 00000000..1562bce9 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/prepared-logs.log4j2.xml @@ -0,0 +1,72 @@ + + + Valid log event with all possible tags and attributes + + + + + + + + + + Valid log event without the Instant tag + + + + + Invalid log event without epochSecond + + + + + Invalid log event without nanoOfSecond + + + + + Invalid log event with invalid epochSecond + + + + Invalid log event without a timestamp + + + + Invalid log event with invalid timeMillis + + + + + Invalid log event without a logger + + + + + Invalid log event without a level + + + + + Invalid log event with an unknown level + + + + + + + + + + Invalid log event without the Event tag + + + + Unparsable log event + + + + + Valid log event after the unparsable one + diff --git a/tests/templates/kuttl/smoke-kraft/test_heap.sh b/tests/templates/kuttl/smoke-kraft/test_heap.sh new file mode 100755 index 00000000..cd76d42a --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/test_heap.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Usage: test_heap.sh + +# 2Gi * 0.8 -> 1638 +EXPECTED_HEAP="-Xmx1638m -Xms1638m" + +# Check if ZK_SERVER_HEAP is set to the correct calculated value +if [[ $KAFKA_HEAP_OPTS == "$EXPECTED_HEAP" ]] +then + echo "[SUCCESS] KAFKA_HEAP_OPTS set to $EXPECTED_HEAP" +else + echo "[ERROR] KAFKA_HEAP_OPTS not set or set with wrong value: $ZK_SERVER_HEAP" + exit 1 +fi + +echo "[SUCCESS] All heap settings tests successful!" diff --git a/tests/templates/kuttl/smoke-kraft/test_log_aggregation.py b/tests/templates/kuttl/smoke-kraft/test_log_aggregation.py new file mode 100755 index 00000000..c27b1992 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/test_log_aggregation.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +import requests + + +def check_sent_events(): + response = requests.post( + "http://kafka-vector-aggregator:8686/graphql", + json={ + "query": """ + { + transforms(first:100) { + nodes { + componentId + metrics { + sentEventsTotal { + sentEventsTotal + } + } + } + } + } + """ + }, + ) + + assert response.status_code == 200, ( + "Cannot access the API of the vector aggregator." + ) + + result = response.json() + + transforms = result["data"]["transforms"]["nodes"] + for transform in transforms: + sentEvents = transform["metrics"]["sentEventsTotal"] + componentId = transform["componentId"] + + if componentId == "filteredInvalidEvents": + assert sentEvents is None or sentEvents["sentEventsTotal"] == 0, ( + "Invalid log events were sent." + ) + else: + assert sentEvents is not None and sentEvents["sentEventsTotal"] > 0, ( + f'No events were sent in "{componentId}".' + ) + + +if __name__ == "__main__": + check_sent_events() + print("Test successful!") diff --git a/tests/templates/kuttl/configuration/10-assert.yaml b/tests/templates/kuttl/smoke/20-assert.yaml.j2 similarity index 100% rename from tests/templates/kuttl/configuration/10-assert.yaml rename to tests/templates/kuttl/smoke/20-assert.yaml.j2 diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2 index 1520c7dc..3049df31 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-assert.yaml.j2 @@ -15,10 +15,10 @@ spec: resources: limits: cpu: 1100m # From podOverrides - memory: 1Gi + memory: 2Gi requests: cpu: 300m # From podOverrides - memory: 1Gi + memory: 2Gi - name: kcat-prober resources: limits: @@ -60,8 +60,8 @@ spec: annotations: secrets.stackable.tech/backend.autotls.cert.lifetime: 7d - name: log-config - - name: config - name: listener-broker + - name: config - name: log status: readyReplicas: 1 diff --git a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 index 9b5f2482..4f3b95a0 100644 --- a/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-kafka.yaml.j2 @@ -29,7 +29,7 @@ spec: zookeeperConfigMapName: test-zk brokers: configOverrides: - server.properties: + broker.properties: compression.type: uncompressed # overridden by role group below controller.quorum.election.backoff.max.ms: "2000" envOverrides: @@ -46,7 +46,7 @@ spec: COMMON_VAR: group-value # overrides role value GROUP_VAR: group-value # only defined here at group level configOverrides: - server.properties: + broker.properties: compression.type: snappy controller.quorum.fetch.timeout.ms: "3000" podOverrides: diff --git a/tests/templates/kuttl/smoke/31-assert.yaml b/tests/templates/kuttl/smoke/31-assert.yaml index 63804de1..26a55394 100644 --- a/tests/templates/kuttl/smoke/31-assert.yaml +++ b/tests/templates/kuttl/smoke/31-assert.yaml @@ -14,6 +14,6 @@ commands: # Test configOverrides # - script: | - kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."server.properties"' | grep "compression.type=snappy" - kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."server.properties"' | grep "controller.quorum.election.backoff.max.ms=2000" - kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."server.properties"' | grep "controller.quorum.fetch.timeout.ms=3000" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "compression.type=snappy" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.election.backoff.max.ms=2000" + kubectl -n $NAMESPACE get cm test-kafka-broker-default -o yaml | yq -e '.data."broker.properties"' | grep "controller.quorum.fetch.timeout.ms=3000" diff --git a/tests/templates/kuttl/smoke/metrics.py b/tests/templates/kuttl/smoke/metrics.py index 8afffe6b..7c9f8027 100644 --- a/tests/templates/kuttl/smoke/metrics.py +++ b/tests/templates/kuttl/smoke/metrics.py @@ -5,8 +5,12 @@ if __name__ == "__main__": result = 0 - LOG_LEVEL = 'DEBUG' # if args.debug else 'INFO' - logging.basicConfig(level=LOG_LEVEL, format='%(asctime)s %(levelname)s: %(message)s', stream=sys.stdout) + LOG_LEVEL = "DEBUG" # if args.debug else 'INFO' + logging.basicConfig( + level=LOG_LEVEL, + format="%(asctime)s %(levelname)s: %(message)s", + stream=sys.stdout, + ) http_code = requests.get("http://test-kafka-broker-default:9606").status_code if http_code != 200: diff --git a/tests/templates/kuttl/smoke/test_heap.sh b/tests/templates/kuttl/smoke/test_heap.sh index 71193850..cd76d42a 100755 --- a/tests/templates/kuttl/smoke/test_heap.sh +++ b/tests/templates/kuttl/smoke/test_heap.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash # Usage: test_heap.sh -# 1Gi * 0.8 -> 819 -EXPECTED_HEAP="-Xmx819m -Xms819m" +# 2Gi * 0.8 -> 1638 +EXPECTED_HEAP="-Xmx1638m -Xms1638m" # Check if ZK_SERVER_HEAP is set to the correct calculated value if [[ $KAFKA_HEAP_OPTS == "$EXPECTED_HEAP" ]] diff --git a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 index 54a7a49d..da660f7f 100644 --- a/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/tls/20-install-kafka.yaml.j2 @@ -63,11 +63,6 @@ spec: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - resources: - # There are some test failures that kill the container due to error 137 (not enough memory) - # using the default (currently 1Gi), when running the tls test scripts. - memory: - limit: '1.3Gi' roleGroups: default: replicas: 3 diff --git a/tests/templates/kuttl/smoke/20-assert.yaml b/tests/templates/kuttl/upgrade/01-assert.yaml.j2 similarity index 100% rename from tests/templates/kuttl/smoke/20-assert.yaml rename to tests/templates/kuttl/upgrade/01-assert.yaml.j2 diff --git a/tests/templates/kuttl/upgrade/02-assert.yaml b/tests/templates/kuttl/upgrade/02-assert.yaml.j2 similarity index 100% rename from tests/templates/kuttl/upgrade/02-assert.yaml rename to tests/templates/kuttl/upgrade/02-assert.yaml.j2 diff --git a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 index b7b1ec7d..afd99b60 100644 --- a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 @@ -53,9 +53,10 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} - zookeeperConfigMapName: test-kafka-znode + zookeeperConfigMapName: test-zk brokers: config: + gracefulShutdownTimeout: 30s # speed up tests logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index da16182f..f3c1315d 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -4,10 +4,14 @@ # --- dimensions: + - name: kafka-kraft + values: + - 3.7.2 + - 3.9.1 + - 4.1.0 - name: kafka values: - 3.7.2 - - 3.9.0 - 3.9.1 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. @@ -63,6 +67,14 @@ dimensions: - "external-stable" - "external-unstable" tests: + - name: operations-kraft + dimensions: + - kafka-kraft + - openshift + - name: smoke-kraft + dimensions: + - kafka-kraft + - openshift - name: smoke dimensions: - kafka @@ -72,7 +84,6 @@ tests: - name: configuration dimensions: - kafka-latest - - zookeeper-latest - openshift - name: upgrade dimensions: