diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml
index b148a7326..a1cd34ffb 100644
--- a/.github/workflows/run_tests.yaml
+++ b/.github/workflows/run_tests.yaml
@@ -68,6 +68,19 @@ jobs:
minikube image load operator.tar
minikube image load metrics-exporter.tar
+ - name: Deploy prometheus
+ run: |
+ cp ./deploy/prometheus/prometheus-sensitive-data.example.sh ./deploy/prometheus/prometheus-sensitive-data.sh
+ NO_WAIT=1 bash ./deploy/prometheus/create-prometheus.sh
+
+ - name: Deploy minio
+ run: |
+ NO_WAIT=1 bash ./deploy/minio/create-minio.sh
+
+ - name: Deploy zookeeper-operator
+ run: |
+ bash ./deploy/zookeeper/zookeeper-with-zookeeper-operator/install-zookeeper-operator.sh
+
- name: Run Tests
id: run-tests
continue-on-error: true
@@ -95,7 +108,7 @@ jobs:
test_mode="--test-to-end"
fi
- ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} $test_mode --trim-results on -o short --native --log ./tests/raw.log
+ ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} $test_mode --trim-results on -o short --no-colors --native --log ./tests/raw.log
test_result=$?
~/venv/qa/bin/tfs --no-colors transform compact ./tests/raw.log ./tests/compact.log
~/venv/qa/bin/tfs --no-colors transform nice ./tests/raw.log ./tests/nice.log.txt
@@ -123,7 +136,7 @@ jobs:
retention-days: 90
- name: Test Failed
- if: ${{ failure() }}
+ if: ${{ steps.run-tests.outputs.test_result != '0' }}
uses: actions/github-script@v3
with:
script: |
diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml
new file mode 100644
index 000000000..659e73d90
--- /dev/null
+++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml
@@ -0,0 +1,662 @@
+# Following manifests based on https://github.com/pravega/zookeeper-operator/
+---
+# service for REST administration and reconfiguration
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper-admin-server
+spec:
+ type: ClusterIP
+ ports:
+ - name: tcp-admin-server
+ port: 8080
+ protocol: TCP
+ targetPort: 8080
+ selector:
+ app: zookeeper
+
+---
+# service for clickhouse zookeeper client connections
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper
+spec:
+ type: ClusterIP
+ ports:
+ - name: tcp-client
+ port: 2181
+ protocol: TCP
+ targetPort: 2181
+ selector:
+ app: zookeeper
+ what: node
+---
+# headless service for Zookeeper Quorum Election and service name
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper-headless
+spec:
+ type: ClusterIP
+ clusterIP: None
+ publishNotReadyAddresses: false
+ ports:
+ - name: tcp-client
+ port: 2181
+ protocol: TCP
+ targetPort: 2181
+ - name: tcp-quorum
+ port: 2888
+ protocol: TCP
+ targetPort: 2888
+ - name: tcp-leader-election
+ port: 3888
+ protocol: TCP
+ targetPort: 3888
+ - name: tcp-metrics
+ port: 7000
+ protocol: TCP
+ targetPort: 7000
+ - name: tcp-admin-server
+ port: 8080
+ protocol: TCP
+ targetPort: 8080
+ selector:
+ app: zookeeper
+---
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ name: zookeeper
+spec:
+ selector:
+ matchLabels:
+ app: zookeeper
+ maxUnavailable: 1
+---
+# ConfigMap with common startup scripts and base config
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper-scripts
+data:
+ env.sh: |
+ #!/usr/bin/env bash
+ export DOMAIN=`hostname -d`
+ export QUORUM_PORT=2888
+ export LEADER_PORT=3888
+ export CLIENT_HOST=zookeeper
+ export CLIENT_PORT=2181
+ export ADMIN_SERVER_HOST=zookeeper-admin-server
+ export ADMIN_SERVER_PORT=8080
+ export CLUSTER_NAME=zookeeper
+ export ZOO_LOG4J_PROP="WARN, CONSOLE"
+ java.env: |
+ JVMFLAGS="-Xms128M -Xmx4G -XX:ActiveProcessorCount=8 -XX:+AlwaysPreTouch -Djute.maxbuffer=8388608 -XX:MaxGCPauseMillis=50"
+ log4j-quiet.properties: |
+ log4j.rootLogger=CONSOLE
+ log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+ log4j.appender.CONSOLE.Threshold=ERROR
+ log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+ log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+ log4j.properties: |
+ zookeeper.root.logger=CONSOLE
+ zookeeper.console.threshold=INFO
+ log4j.rootLogger=${zookeeper.root.logger}
+ log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+ log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
+ log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+ log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+ logback.xml: |
+
+
+
+
+ %d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+
+
+ ${zookeeper.console.threshold}
+
+
+
+
+
+
+
+ zoo.cfg: |
+ 4lw.commands.whitelist=*
+ dataDir=/var/lib/zookeeper/data
+ dataLogDir=/var/lib/zookeeper/datalog
+ standaloneEnabled=false
+ reconfigEnabled=true
+ skipACL=yes
+ metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider
+ metricsProvider.httpPort=7000
+ metricsProvider.exportJvmInfo=true
+ tickTime=2000
+ initLimit=300
+ syncLimit=10
+ maxClientCnxns=2000
+ maxTimeToWaitForEpoch=2000
+ globalOutstandingLimit=1000
+ preAllocSize=131072
+ snapCount=1500000
+ commitLogCount=500
+ snapSizeLimitInKb=4194304
+ maxCnxns=0
+ minSessionTimeout=4000
+ maxSessionTimeout=40000
+ autopurge.snapRetainCount=3
+ autopurge.purgeInterval=1
+ quorumListenOnAllIPs=false
+ admin.serverPort=8080
+ dynamicConfigFile=/var/lib/zookeeper/zoo.cfg.dynamic
+ zookeeperFunctions.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+ set -ex
+ function zkConfig() {
+ echo "$HOST.$DOMAIN:$QUORUM_PORT:$LEADER_PORT:$ROLE;$CLIENT_PORT"
+ }
+ function zkConnectionString() {
+ # If the client service address is not yet available, then return localhost
+ set +e
+ getent hosts "${CLIENT_HOST}" 2>/dev/null 1>/dev/null
+ if [[ $? -ne 0 ]]; then
+ set -e
+ echo "localhost:${CLIENT_PORT}"
+ else
+ set -e
+ echo "${CLIENT_HOST}:${CLIENT_PORT}"
+ fi
+ }
+ zookeeperStart.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+
+ set -ex
+ # TODO think how to add nslookup to docker image
+ # apt-get update && apt-get install --no-install-recommends -y dnsutils procps
+ source /conf/env.sh
+ source /conf/zookeeperFunctions.sh
+
+ HOST=`hostname -s`
+ VOLUME_DIR=/var/lib/zookeeper
+ MYID_FILE=$VOLUME_DIR/data/myid
+ LOG4J_CONF=/conf/log4j-quiet.properties
+ DYNCONFIG=$VOLUME_DIR/zoo.cfg.dynamic
+ STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg
+
+ if [[ ! -d "$VOLUME_DIR/data" ]]; then
+ mkdir -p $VOLUME_DIR/data
+ chown zookeeper $VOLUME_DIR/data
+ fi
+ if [[ ! -d "$VOLUME_DIR/datalog" ]]; then
+ mkdir -p $VOLUME_DIR/datalog
+ chown zookeeper $VOLUME_DIR/datalog
+ fi
+
+ # Extract resource name and this members ordinal value from pod hostname
+ if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then
+ NAME=${BASH_REMATCH[1]}
+ ORD=${BASH_REMATCH[2]}
+ else
+ echo Failed to parse name and ordinal of Pod
+ exit 1
+ fi
+
+ MYID=$((ORD+1))
+
+ # Values for first startup
+ WRITE_CONFIGURATION=true
+ REGISTER_NODE=true
+ ONDISK_MYID_CONFIG=false
+ ONDISK_DYN_CONFIG=false
+
+ # Check validity of on-disk configuration
+ if [ -f $MYID_FILE ]; then
+ EXISTING_ID="`cat $MYID_FILE`"
+ if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then
+ # If Id is correct and configuration is present under `/var/lib/zookeeper/conf`
+ ONDISK_MYID_CONFIG=true
+ fi
+ fi
+
+ if [ -f $DYNCONFIG ]; then
+ ONDISK_DYN_CONFIG=true
+ fi
+
+ # Determine if there is an ensemble available to join by checking the service domain
+ set +e
+ getent hosts $DOMAIN # This only performs a dns lookup
+ if [[ $? -eq 0 ]]; then
+ ACTIVE_ENSEMBLE=true
+ else
+ ACTIVE_ENSEMBLE=false
+ fi
+ # elif nslookup $DOMAIN | grep -q "server can't find $DOMAIN"; then
+ # echo "there is no active ensemble"
+ # ACTIVE_ENSEMBLE=false
+ # else
+ # If an nslookup of the headless service domain fails, then there is no
+ # active ensemble yet, but in certain cases nslookup of headless service
+ # takes a while to come up even if there is active ensemble
+ # ACTIVE_ENSEMBLE=false
+ # declare -i count=10
+ # while [[ $count -ge 0 ]]
+ # do
+ # sleep 1
+ # ((count=count-1))
+ # getent hosts $DOMAIN
+ # if [[ $? -eq 0 ]]; then
+ # ACTIVE_ENSEMBLE=true
+ # break
+ # fi
+ # done
+ # fi
+
+ if [[ "$ONDISK_MYID_CONFIG" == true && "$ONDISK_DYN_CONFIG" == true ]]; then
+ # If Configuration is present, we assume, there is no need to write configuration.
+ WRITE_CONFIGURATION=false
+ else
+ WRITE_CONFIGURATION=true
+ fi
+
+ if [[ "$ACTIVE_ENSEMBLE" == false ]]; then
+ # This is the first node being added to the cluster or headless service not yet available
+ REGISTER_NODE=false
+ else
+ # If ensemble exists, check to see if this node is already a member.
+ if [[ "$ONDISK_MYID_CONFIG" == false || "$ONDISK_DYN_CONFIG" == false ]]; then
+ REGISTER_NODE=true
+ elif [[ -f "$STATIC_CONFIG" ]]; then
+ DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="`
+ DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=}
+ if [[ "0" == $(grep -c "server.${MYID}=" "${DYN_CFG_FILE}") ]]; then
+ REGISTER_NODE=true
+ else
+ REGISTER_NODE=false
+ fi
+ else
+ REGISTER_NODE=false
+ fi
+ fi
+
+ if [[ "$WRITE_CONFIGURATION" == true ]]; then
+ echo "Writing myid: $MYID to: $MYID_FILE."
+ echo $MYID > $MYID_FILE
+ if [[ $MYID -eq 1 ]]; then
+ ROLE=participant
+ echo Initial initialization of ordinal 0 pod, creating new config.
+ ZKCONFIG=$(zkConfig)
+ echo Writing bootstrap configuration with the following config:
+ echo $ZKCONFIG
+ echo $MYID > $MYID_FILE
+ echo "server.${MYID}=${ZKCONFIG}" > $DYNCONFIG
+ fi
+ fi
+
+ export ZOOCFGDIR=/var/lib/zookeeper/conf
+ if [[ "$REGISTER_NODE" == true ]]; then
+ ROLE=observer
+ ZKURL=$(zkConnectionString)
+ ZKCONFIG=$(zkConfig)
+
+ set -e
+ echo Registering node and writing local configuration to disk.
+ zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG" | grep -E '^server\.[0-9]+=' > $DYNCONFIG
+ if ls $ZOOCFGDIR/zoo.cfg.dynamic.* 1> /dev/null 2>&1; then
+ DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="`
+ DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=}
+ cp -fv $DYNCONFIG $DYN_CFG_FILE
+ fi
+ set +e
+ fi
+
+
+ echo Copying /conf contents to writable directory, to support Zookeeper dynamic reconfiguration
+ if [[ ! -d "$ZOOCFGDIR" ]]; then
+ mkdir $ZOOCFGDIR
+ cp -f /conf/zoo.cfg $ZOOCFGDIR
+ else
+ echo Copying the /conf/zoo.cfg contents except the dynamic config file during restart
+ echo -e "$( head -n -1 /conf/zoo.cfg )""\n""$( tail -n 1 "$STATIC_CONFIG" )" > $STATIC_CONFIG
+ fi
+ cp -f /conf/log4j.properties $ZOOCFGDIR
+ cp -f /conf/log4j-quiet.properties $ZOOCFGDIR
+ cp -f /conf/logback.xml $ZOOCFGDIR
+ cp -f /conf/env.sh $ZOOCFGDIR
+ cp -f /conf/java.env $ZOOCFGDIR
+
+ if [ -f $DYNCONFIG ]; then
+ # Node registered, start server
+ echo Starting zookeeper service
+ zkServer.sh --config $ZOOCFGDIR start-foreground
+ else
+ echo "Node failed to register!"
+ exit 1
+ fi
+
+ zookeeperTeardown.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+
+ set -ex
+ exec > /proc/1/fd/1
+ exec 2> /proc/1/fd/2
+
+ source /conf/env.sh
+ source /conf/zookeeperFunctions.sh
+
+ VOLUME_DIR=/var/lib/zookeeper
+ MYID_FILE=$VOLUME_DIR/data/myid
+
+ # Check to see if zookeeper service for this node is a participant
+ set +e
+ ZKURL=$(zkConnectionString)
+ set -e
+ MYID=`cat $MYID_FILE`
+ CLUSTER_SIZE=$(zkCli.sh -server ${ZKURL} config | grep -c -E '^server\.[0-9]+=')
+ echo "CLUSTER_SIZE=$CLUSTER_SIZE, MyId=$MYID"
+
+ # Optional desired ensemble size.
+ if [[ -f /conf/ensemble.env ]]; then
+ set +e
+ source /conf/ensemble.env
+ set -e
+ fi
+ DESIRED_ENSEMBLE_SIZE=${DESIRED_ENSEMBLE_SIZE:-3}
+ echo "DESIRED_ENSEMBLE_SIZE=$DESIRED_ENSEMBLE_SIZE"
+
+ # NEED_SCALE_DOWN can be set explicitly(to 1). Otherwise, set it automatically
+ # when the desired ensemble size is smaller than the current cluster size.
+ if [[ "$DESIRED_ENSEMBLE_SIZE" -lt "$CLUSTER_SIZE" ]]; then
+ echo "$DESIRED_ENSEMBLE_SIZE < $CLUSTER_SIZE, setting NEED_SCALE_DOWN"
+ NEED_SCALE_DOWN=1
+ fi
+
+ if [[ "$MYID" -le "1" ]]; then
+ echo "For id=1 scale down is forbidden"
+ NEED_SCALE_DOWN=0
+ fi
+
+ if [[ "$NEED_SCALE_DOWN" == "1" ]]; then
+ echo "NEED_SCALE_DOWN=1"
+
+ set +e
+ zkCli.sh -server ${ZKURL} reconfig -remove $MYID
+ echo $?
+ set -e
+
+ # give some time to propagate the changes
+ sleep 2
+ fi
+
+ # Kill the primary process ourselves to circumvent the terminationGracePeriodSeconds
+ ps -ef | grep zoo.cfg | grep -v grep | awk '{print $2}' | xargs kill
+
+
+ zookeeperLive.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+ set -ex
+ source /conf/env.sh
+ OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;)
+ # Check to see if zookeeper service answers
+ if [[ "$OK" == "imok" ]]; then
+ exit 0
+ else
+ exit 1
+ fi
+
+ zookeeperReady.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+ set -ex
+ source /conf/env.sh
+ source /conf/zookeeperFunctions.sh
+
+ HOST=`hostname -s`
+ VOLUME_DIR=/var/lib/zookeeper
+ MYID_FILE=$VOLUME_DIR/data/myid
+ LOG4J_CONF=/conf/log4j-quiet.properties
+ STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg
+
+ OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;)
+
+ # Check to see if zookeeper service answers
+ if [[ "$OK" == "imok" ]]; then
+ set +e
+ getent hosts $DOMAIN
+ if [[ $? -ne 0 ]]; then
+ set -e
+ echo "There is no active ensemble, skipping readiness probe..."
+ exit 0
+ else
+ set -e
+ # An ensemble exists, check to see if this node is already a member.
+ # Check to see if zookeeper service for this node is a participant
+ set +e
+ # Extract resource name and this members' ordinal value from pod hostname
+ HOST=`hostname -s`
+ if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then
+ NAME=${BASH_REMATCH[1]}
+ ORD=${BASH_REMATCH[2]}
+ else
+ echo Failed to parse name and ordinal of Pod
+ exit 1
+ fi
+ MYID=$((ORD+1))
+ ONDISK_CONFIG=false
+ if [ -f $MYID_FILE ]; then
+ EXISTING_ID="`cat $MYID_FILE`"
+ if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then
+ #If Id is correct and configuration is present under `/var/lib/zookeeper/conf`
+ ONDISK_CONFIG=true
+ DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="`
+ DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=}
+ SERVER_FOUND=`cat $DYN_CFG_FILE | grep -c "server.${MYID}="`
+ if [[ "$SERVER_FOUND" == "0" ]]; then
+ echo "Server not found in ensemble. Exiting ..."
+ exit 1
+ fi
+ SERVER=`cat $DYN_CFG_FILE | grep "server.${MYID}="`
+ if [[ "$SERVER" == *"participant"* ]]; then
+ ROLE=participant
+ elif [[ "$SERVER" == *"observer"* ]]; then
+ ROLE=observer
+ fi
+ fi
+ fi
+
+ if [[ "$ROLE" == "participant" ]]; then
+ echo "Zookeeper service is available and an active participant"
+ exit 0
+ elif [[ "$ROLE" == "observer" ]]; then
+ echo "Zookeeper service is ready to be upgraded from observer to participant."
+ ROLE=participant
+ ZKURL=$(zkConnectionString)
+ ZKCONFIG=$(zkConfig)
+
+ # Note: according to https://zookeeper.apache.org/doc/r3.9.3/zookeeperReconfig.html
+ # > One can modify any of the ports of a server, or its role (participant/observer) by adding it to the ensemble with different parameters.
+ # > This works in both the incremental and the bulk reconfiguration modes.
+ # > It is not necessary to remove the server and then add it back;
+ # > just specify the new parameters as if the server is not yet in the system.
+ # > The server will detect the configuration change and perform the necessary adjustments.
+
+ zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG"
+ exit 0
+ else
+ echo "Something has gone wrong. Unable to determine zookeeper role."
+ exit 1
+ fi
+ fi
+
+ else
+ echo "Zookeeper service is not available for requests"
+ exit 1
+ fi
+
+ ensemble.env: |
+ # DESIRED_ENSEMBLE_SIZE specifies desired number of nodes in the ensemble.
+ # Set to an integer >= 1. If unset, scripts default to 3.
+ # Example:
+ DESIRED_ENSEMBLE_SIZE=1
+
+ # Optional: NEED_SCALE_DOWN override. Allow to force scale-down logic during shutdown
+ # even if the cluster has already the correct size
+ NEED_SCALE_DOWN=1
+
+---
+# Main StatefulSet
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper
+
+spec:
+# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention
+# requires alpha `StatefulSetAutoDeletePVC=true` feature gate
+# persistentVolumeClaimRetentionPolicy:
+# whenDeleted: Delete
+# whenScaled: Delete
+ podManagementPolicy: OrderedReady
+ replicas: 1
+ selector:
+ matchLabels:
+ app: zookeeper
+ serviceName: zookeeper-headless
+ template:
+ metadata:
+ generateName: zookeeper
+ labels:
+ app: zookeeper
+ what: node
+ annotations:
+ prometheus.io/port: '7000'
+ prometheus.io/scrape: 'true'
+ spec:
+# affinity:
+# podAntiAffinity:
+# requiredDuringSchedulingIgnoredDuringExecution:
+# - labelSelector:
+# matchExpressions:
+# - key: "app"
+# operator: In
+# values:
+# - zookeeper
+# # TODO think about multi-AZ EKS
+# # topologyKey: topology.kubernetes.io/zone
+# topologyKey: "kubernetes.io/hostname"
+# nodeAffinity:
+# preferredDuringSchedulingIgnoredDuringExecution:
+# - preference:
+# matchExpressions:
+# - key: topology.kubernetes.io/zone
+# operator: In
+# values: [] # to be added programatically
+# weight: 100
+ containers:
+ - name: zookeeper
+ command:
+ - /conf/zookeeperStart.sh
+ image: docker.io/zookeeper:3.8.4
+ imagePullPolicy: Always
+ lifecycle:
+ preStop:
+ exec:
+ command:
+ - /conf/zookeeperTeardown.sh
+ livenessProbe:
+ exec:
+ command:
+ - /conf/zookeeperLive.sh
+ failureThreshold: 3
+ initialDelaySeconds: 30
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 10
+ readinessProbe:
+ exec:
+ command:
+ - /conf/zookeeperReady.sh
+ failureThreshold: 3
+ initialDelaySeconds: 30
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 10
+ ports:
+ - containerPort: 2181
+ name: client
+ protocol: TCP
+ - containerPort: 2888
+ name: quorum
+ protocol: TCP
+ - containerPort: 3888
+ name: leader-election
+ protocol: TCP
+ - containerPort: 7000
+ name: metrics
+ protocol: TCP
+ - containerPort: 8080
+ name: admin-server
+ protocol: TCP
+ volumeMounts:
+ - mountPath: /var/lib/zookeeper
+ name: datadir-volume
+ - mountPath: /conf
+ name: conf
+ restartPolicy: Always
+ schedulerName: default-scheduler
+ terminationGracePeriodSeconds: 30
+ volumes:
+ - configMap:
+ name: zookeeper-scripts
+ defaultMode: 0755
+ name: conf
+ updateStrategy:
+ type: RollingUpdate
+ volumeClaimTemplates:
+ - metadata:
+ labels:
+ app: zookeeper
+ name: datadir-volume
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 1Gi
+ volumeMode: Filesystem
diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml
new file mode 100644
index 000000000..1c56abcdb
--- /dev/null
+++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml
@@ -0,0 +1,662 @@
+# Following manifests based on https://github.com/pravega/zookeeper-operator/
+---
+# service for REST administration and reconfiguration
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper-admin-server
+spec:
+ type: ClusterIP
+ ports:
+ - name: tcp-admin-server
+ port: 8080
+ protocol: TCP
+ targetPort: 8080
+ selector:
+ app: zookeeper
+
+---
+# service for clickhouse zookeeper client connections
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper
+spec:
+ type: ClusterIP
+ ports:
+ - name: tcp-client
+ port: 2181
+ protocol: TCP
+ targetPort: 2181
+ selector:
+ app: zookeeper
+ what: node
+---
+# headless service for Zookeeper Quorum Election and service name
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper-headless
+spec:
+ type: ClusterIP
+ clusterIP: None
+ publishNotReadyAddresses: false
+ ports:
+ - name: tcp-client
+ port: 2181
+ protocol: TCP
+ targetPort: 2181
+ - name: tcp-quorum
+ port: 2888
+ protocol: TCP
+ targetPort: 2888
+ - name: tcp-leader-election
+ port: 3888
+ protocol: TCP
+ targetPort: 3888
+ - name: tcp-metrics
+ port: 7000
+ protocol: TCP
+ targetPort: 7000
+ - name: tcp-admin-server
+ port: 8080
+ protocol: TCP
+ targetPort: 8080
+ selector:
+ app: zookeeper
+---
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ name: zookeeper
+spec:
+ selector:
+ matchLabels:
+ app: zookeeper
+ maxUnavailable: 1
+---
+# ConfigMap with common startup scripts and base config
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper-scripts
+data:
+ env.sh: |
+ #!/usr/bin/env bash
+ export DOMAIN=`hostname -d`
+ export QUORUM_PORT=2888
+ export LEADER_PORT=3888
+ export CLIENT_HOST=zookeeper
+ export CLIENT_PORT=2181
+ export ADMIN_SERVER_HOST=zookeeper-admin-server
+ export ADMIN_SERVER_PORT=8080
+ export CLUSTER_NAME=zookeeper
+ export ZOO_LOG4J_PROP="WARN, CONSOLE"
+ java.env: |
+ JVMFLAGS="-Xms128M -Xmx4G -XX:ActiveProcessorCount=8 -XX:+AlwaysPreTouch -Djute.maxbuffer=8388608 -XX:MaxGCPauseMillis=50"
+ log4j-quiet.properties: |
+ log4j.rootLogger=CONSOLE
+ log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+ log4j.appender.CONSOLE.Threshold=ERROR
+ log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+ log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+ log4j.properties: |
+ zookeeper.root.logger=CONSOLE
+ zookeeper.console.threshold=INFO
+ log4j.rootLogger=${zookeeper.root.logger}
+ log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+ log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
+ log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+ log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+ logback.xml: |
+
+
+
+
+ %d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
+
+
+ ${zookeeper.console.threshold}
+
+
+
+
+
+
+
+ zoo.cfg: |
+ 4lw.commands.whitelist=*
+ dataDir=/var/lib/zookeeper/data
+ dataLogDir=/var/lib/zookeeper/datalog
+ standaloneEnabled=false
+ reconfigEnabled=true
+ skipACL=yes
+ metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider
+ metricsProvider.httpPort=7000
+ metricsProvider.exportJvmInfo=true
+ tickTime=2000
+ initLimit=300
+ syncLimit=10
+ maxClientCnxns=2000
+ maxTimeToWaitForEpoch=2000
+ globalOutstandingLimit=1000
+ preAllocSize=131072
+ snapCount=1500000
+ commitLogCount=500
+ snapSizeLimitInKb=4194304
+ maxCnxns=0
+ minSessionTimeout=4000
+ maxSessionTimeout=40000
+ autopurge.snapRetainCount=3
+ autopurge.purgeInterval=1
+ quorumListenOnAllIPs=false
+ admin.serverPort=8080
+ dynamicConfigFile=/var/lib/zookeeper/zoo.cfg.dynamic
+ zookeeperFunctions.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+ set -ex
+ function zkConfig() {
+ echo "$HOST.$DOMAIN:$QUORUM_PORT:$LEADER_PORT:$ROLE;$CLIENT_PORT"
+ }
+ function zkConnectionString() {
+ # If the client service address is not yet available, then return localhost
+ set +e
+ getent hosts "${CLIENT_HOST}" 2>/dev/null 1>/dev/null
+ if [[ $? -ne 0 ]]; then
+ set -e
+ echo "localhost:${CLIENT_PORT}"
+ else
+ set -e
+ echo "${CLIENT_HOST}:${CLIENT_PORT}"
+ fi
+ }
+ zookeeperStart.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+
+ set -ex
+ # TODO think how to add nslookup to docker image
+ # apt-get update && apt-get install --no-install-recommends -y dnsutils procps
+ source /conf/env.sh
+ source /conf/zookeeperFunctions.sh
+
+ HOST=`hostname -s`
+ VOLUME_DIR=/var/lib/zookeeper
+ MYID_FILE=$VOLUME_DIR/data/myid
+ LOG4J_CONF=/conf/log4j-quiet.properties
+ DYNCONFIG=$VOLUME_DIR/zoo.cfg.dynamic
+ STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg
+
+ if [[ ! -d "$VOLUME_DIR/data" ]]; then
+ mkdir -p $VOLUME_DIR/data
+ chown zookeeper $VOLUME_DIR/data
+ fi
+ if [[ ! -d "$VOLUME_DIR/datalog" ]]; then
+ mkdir -p $VOLUME_DIR/datalog
+ chown zookeeper $VOLUME_DIR/datalog
+ fi
+
+ # Extract resource name and this members ordinal value from pod hostname
+ if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then
+ NAME=${BASH_REMATCH[1]}
+ ORD=${BASH_REMATCH[2]}
+ else
+ echo Failed to parse name and ordinal of Pod
+ exit 1
+ fi
+
+ MYID=$((ORD+1))
+
+ # Values for first startup
+ WRITE_CONFIGURATION=true
+ REGISTER_NODE=true
+ ONDISK_MYID_CONFIG=false
+ ONDISK_DYN_CONFIG=false
+
+ # Check validity of on-disk configuration
+ if [ -f $MYID_FILE ]; then
+ EXISTING_ID="`cat $MYID_FILE`"
+ if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then
+ # If Id is correct and configuration is present under `/var/lib/zookeeper/conf`
+ ONDISK_MYID_CONFIG=true
+ fi
+ fi
+
+ if [ -f $DYNCONFIG ]; then
+ ONDISK_DYN_CONFIG=true
+ fi
+
+ # Determine if there is an ensemble available to join by checking the service domain
+ set +e
+ getent hosts $DOMAIN # This only performs a dns lookup
+ if [[ $? -eq 0 ]]; then
+ ACTIVE_ENSEMBLE=true
+ else
+ ACTIVE_ENSEMBLE=false
+ fi
+ # elif nslookup $DOMAIN | grep -q "server can't find $DOMAIN"; then
+ # echo "there is no active ensemble"
+ # ACTIVE_ENSEMBLE=false
+ # else
+ # If an nslookup of the headless service domain fails, then there is no
+ # active ensemble yet, but in certain cases nslookup of headless service
+ # takes a while to come up even if there is active ensemble
+ # ACTIVE_ENSEMBLE=false
+ # declare -i count=10
+ # while [[ $count -ge 0 ]]
+ # do
+ # sleep 1
+ # ((count=count-1))
+ # getent hosts $DOMAIN
+ # if [[ $? -eq 0 ]]; then
+ # ACTIVE_ENSEMBLE=true
+ # break
+ # fi
+ # done
+ # fi
+
+ if [[ "$ONDISK_MYID_CONFIG" == true && "$ONDISK_DYN_CONFIG" == true ]]; then
+ # If Configuration is present, we assume, there is no need to write configuration.
+ WRITE_CONFIGURATION=false
+ else
+ WRITE_CONFIGURATION=true
+ fi
+
+ if [[ "$ACTIVE_ENSEMBLE" == false ]]; then
+ # This is the first node being added to the cluster or headless service not yet available
+ REGISTER_NODE=false
+ else
+ # If ensemble exists, check to see if this node is already a member.
+ if [[ "$ONDISK_MYID_CONFIG" == false || "$ONDISK_DYN_CONFIG" == false ]]; then
+ REGISTER_NODE=true
+ elif [[ -f "$STATIC_CONFIG" ]]; then
+ DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="`
+ DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=}
+ if [[ "0" == $(grep -c "server.${MYID}=" "${DYN_CFG_FILE}") ]]; then
+ REGISTER_NODE=true
+ else
+ REGISTER_NODE=false
+ fi
+ else
+ REGISTER_NODE=false
+ fi
+ fi
+
+ if [[ "$WRITE_CONFIGURATION" == true ]]; then
+ echo "Writing myid: $MYID to: $MYID_FILE."
+ echo $MYID > $MYID_FILE
+ if [[ $MYID -eq 1 ]]; then
+ ROLE=participant
+ echo Initial initialization of ordinal 0 pod, creating new config.
+ ZKCONFIG=$(zkConfig)
+ echo Writing bootstrap configuration with the following config:
+ echo $ZKCONFIG
+ echo $MYID > $MYID_FILE
+ echo "server.${MYID}=${ZKCONFIG}" > $DYNCONFIG
+ fi
+ fi
+
+ export ZOOCFGDIR=/var/lib/zookeeper/conf
+ if [[ "$REGISTER_NODE" == true ]]; then
+ ROLE=observer
+ ZKURL=$(zkConnectionString)
+ ZKCONFIG=$(zkConfig)
+
+ set -e
+ echo Registering node and writing local configuration to disk.
+ zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG" | grep -E '^server\.[0-9]+=' > $DYNCONFIG
+ if ls $ZOOCFGDIR/zoo.cfg.dynamic.* 1> /dev/null 2>&1; then
+ DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="`
+ DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=}
+ cp -fv $DYNCONFIG $DYN_CFG_FILE
+ fi
+ set +e
+ fi
+
+
+ echo Copying /conf contents to writable directory, to support Zookeeper dynamic reconfiguration
+ if [[ ! -d "$ZOOCFGDIR" ]]; then
+ mkdir $ZOOCFGDIR
+ cp -f /conf/zoo.cfg $ZOOCFGDIR
+ else
+ echo Copying the /conf/zoo.cfg contents except the dynamic config file during restart
+ echo -e "$( head -n -1 /conf/zoo.cfg )""\n""$( tail -n 1 "$STATIC_CONFIG" )" > $STATIC_CONFIG
+ fi
+ cp -f /conf/log4j.properties $ZOOCFGDIR
+ cp -f /conf/log4j-quiet.properties $ZOOCFGDIR
+ cp -f /conf/logback.xml $ZOOCFGDIR
+ cp -f /conf/env.sh $ZOOCFGDIR
+ cp -f /conf/java.env $ZOOCFGDIR
+
+ if [ -f $DYNCONFIG ]; then
+ # Node registered, start server
+ echo Starting zookeeper service
+ zkServer.sh --config $ZOOCFGDIR start-foreground
+ else
+ echo "Node failed to register!"
+ exit 1
+ fi
+
+ zookeeperTeardown.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+
+ set -ex
+ exec > /proc/1/fd/1
+ exec 2> /proc/1/fd/2
+
+ source /conf/env.sh
+ source /conf/zookeeperFunctions.sh
+
+ VOLUME_DIR=/var/lib/zookeeper
+ MYID_FILE=$VOLUME_DIR/data/myid
+
+ # Check to see if zookeeper service for this node is a participant
+ set +e
+ ZKURL=$(zkConnectionString)
+ set -e
+ MYID=`cat $MYID_FILE`
+ CLUSTER_SIZE=$(zkCli.sh -server ${ZKURL} config | grep -c -E '^server\.[0-9]+=')
+ echo "CLUSTER_SIZE=$CLUSTER_SIZE, MyId=$MYID"
+
+ # Optional desired ensemble size.
+ if [[ -f /conf/ensemble.env ]]; then
+ set +e
+ source /conf/ensemble.env
+ set -e
+ fi
+ DESIRED_ENSEMBLE_SIZE=${DESIRED_ENSEMBLE_SIZE:-3}
+ echo "DESIRED_ENSEMBLE_SIZE=$DESIRED_ENSEMBLE_SIZE"
+
+ # NEED_SCALE_DOWN can be set explicitly(to 1). Otherwise, set it automatically
+ # when the desired ensemble size is smaller than the current cluster size.
+ if [[ "$DESIRED_ENSEMBLE_SIZE" -lt "$CLUSTER_SIZE" ]]; then
+ echo "$DESIRED_ENSEMBLE_SIZE < $CLUSTER_SIZE, setting NEED_SCALE_DOWN"
+ NEED_SCALE_DOWN=1
+ fi
+
+ if [[ "$MYID" -le "1" ]]; then
+ echo "For id=1 scale down is forbidden"
+ NEED_SCALE_DOWN=0
+ fi
+
+ if [[ "$NEED_SCALE_DOWN" == "1" ]]; then
+ echo "NEED_SCALE_DOWN=1"
+
+ set +e
+ zkCli.sh -server ${ZKURL} reconfig -remove $MYID
+ echo $?
+ set -e
+
+ # give some time to propagate the changes
+ sleep 2
+ fi
+
+ # Kill the primary process ourselves to circumvent the terminationGracePeriodSeconds
+ ps -ef | grep zoo.cfg | grep -v grep | awk '{print $2}' | xargs kill
+
+
+ zookeeperLive.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+ set -ex
+ source /conf/env.sh
+ OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;)
+ # Check to see if zookeeper service answers
+ if [[ "$OK" == "imok" ]]; then
+ exit 0
+ else
+ exit 1
+ fi
+
+ zookeeperReady.sh: |
+ #!/usr/bin/env bash
+ # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ # https://www.apache.org/licenses/LICENSE-2.0
+ set -ex
+ source /conf/env.sh
+ source /conf/zookeeperFunctions.sh
+
+ HOST=`hostname -s`
+ VOLUME_DIR=/var/lib/zookeeper
+ MYID_FILE=$VOLUME_DIR/data/myid
+ LOG4J_CONF=/conf/log4j-quiet.properties
+ STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg
+
+ OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;)
+
+ # Check to see if zookeeper service answers
+ if [[ "$OK" == "imok" ]]; then
+ set +e
+ getent hosts $DOMAIN
+ if [[ $? -ne 0 ]]; then
+ set -e
+ echo "There is no active ensemble, skipping readiness probe..."
+ exit 0
+ else
+ set -e
+ # An ensemble exists, check to see if this node is already a member.
+ # Check to see if zookeeper service for this node is a participant
+ set +e
+ # Extract resource name and this members' ordinal value from pod hostname
+ HOST=`hostname -s`
+ if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then
+ NAME=${BASH_REMATCH[1]}
+ ORD=${BASH_REMATCH[2]}
+ else
+ echo Failed to parse name and ordinal of Pod
+ exit 1
+ fi
+ MYID=$((ORD+1))
+ ONDISK_CONFIG=false
+ if [ -f $MYID_FILE ]; then
+ EXISTING_ID="`cat $MYID_FILE`"
+ if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then
+ #If Id is correct and configuration is present under `/var/lib/zookeeper/conf`
+ ONDISK_CONFIG=true
+ DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="`
+ DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=}
+ SERVER_FOUND=`cat $DYN_CFG_FILE | grep -c "server.${MYID}="`
+ if [[ "$SERVER_FOUND" == "0" ]]; then
+ echo "Server not found in ensemble. Exiting ..."
+ exit 1
+ fi
+ SERVER=`cat $DYN_CFG_FILE | grep "server.${MYID}="`
+ if [[ "$SERVER" == *"participant"* ]]; then
+ ROLE=participant
+ elif [[ "$SERVER" == *"observer"* ]]; then
+ ROLE=observer
+ fi
+ fi
+ fi
+
+ if [[ "$ROLE" == "participant" ]]; then
+ echo "Zookeeper service is available and an active participant"
+ exit 0
+ elif [[ "$ROLE" == "observer" ]]; then
+ echo "Zookeeper service is ready to be upgraded from observer to participant."
+ ROLE=participant
+ ZKURL=$(zkConnectionString)
+ ZKCONFIG=$(zkConfig)
+
+ # Note: according to https://zookeeper.apache.org/doc/r3.9.3/zookeeperReconfig.html
+ # > One can modify any of the ports of a server, or its role (participant/observer) by adding it to the ensemble with different parameters.
+ # > This works in both the incremental and the bulk reconfiguration modes.
+ # > It is not necessary to remove the server and then add it back;
+ # > just specify the new parameters as if the server is not yet in the system.
+ # > The server will detect the configuration change and perform the necessary adjustments.
+
+ zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG"
+ exit 0
+ else
+ echo "Something has gone wrong. Unable to determine zookeeper role."
+ exit 1
+ fi
+ fi
+
+ else
+ echo "Zookeeper service is not available for requests"
+ exit 1
+ fi
+
+ ensemble.env: |
+ # DESIRED_ENSEMBLE_SIZE specifies desired number of nodes in the ensemble.
+ # Set to an integer >= 1. If unset, scripts default to 3.
+ # Example:
+ DESIRED_ENSEMBLE_SIZE=3
+
+ # Optional: NEED_SCALE_DOWN override. Allow to force scale-down logic during shutdown
+ # even if the cluster has already the correct size
+ NEED_SCALE_DOWN=1
+
+---
+# Main StatefulSet
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ labels:
+ app: zookeeper
+ name: zookeeper
+
+spec:
+# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention
+# requires alpha `StatefulSetAutoDeletePVC=true` feature gate
+# persistentVolumeClaimRetentionPolicy:
+# whenDeleted: Delete
+# whenScaled: Delete
+ podManagementPolicy: OrderedReady
+ replicas: 3
+ selector:
+ matchLabels:
+ app: zookeeper
+ serviceName: zookeeper-headless
+ template:
+ metadata:
+ generateName: zookeeper
+ labels:
+ app: zookeeper
+ what: node
+ annotations:
+ prometheus.io/port: '7000'
+ prometheus.io/scrape: 'true'
+ spec:
+# affinity:
+# podAntiAffinity:
+# requiredDuringSchedulingIgnoredDuringExecution:
+# - labelSelector:
+# matchExpressions:
+# - key: "app"
+# operator: In
+# values:
+# - zookeeper
+# # TODO think about multi-AZ EKS
+# # topologyKey: topology.kubernetes.io/zone
+# topologyKey: "kubernetes.io/hostname"
+# nodeAffinity:
+# preferredDuringSchedulingIgnoredDuringExecution:
+# - preference:
+# matchExpressions:
+# - key: topology.kubernetes.io/zone
+# operator: In
+# values: [] # to be added programatically
+# weight: 100
+ containers:
+ - name: zookeeper
+ command:
+ - /conf/zookeeperStart.sh
+ image: docker.io/zookeeper:3.8.4
+ imagePullPolicy: Always
+ lifecycle:
+ preStop:
+ exec:
+ command:
+ - /conf/zookeeperTeardown.sh
+ livenessProbe:
+ exec:
+ command:
+ - /conf/zookeeperLive.sh
+ failureThreshold: 3
+ initialDelaySeconds: 30
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 10
+ readinessProbe:
+ exec:
+ command:
+ - /conf/zookeeperReady.sh
+ failureThreshold: 3
+ initialDelaySeconds: 30
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 10
+ ports:
+ - containerPort: 2181
+ name: client
+ protocol: TCP
+ - containerPort: 2888
+ name: quorum
+ protocol: TCP
+ - containerPort: 3888
+ name: leader-election
+ protocol: TCP
+ - containerPort: 7000
+ name: metrics
+ protocol: TCP
+ - containerPort: 8080
+ name: admin-server
+ protocol: TCP
+ volumeMounts:
+ - mountPath: /var/lib/zookeeper
+ name: datadir-volume
+ - mountPath: /conf
+ name: conf
+ restartPolicy: Always
+ schedulerName: default-scheduler
+ terminationGracePeriodSeconds: 30
+ volumes:
+ - configMap:
+ name: zookeeper-scripts
+ defaultMode: 0755
+ name: conf
+ updateStrategy:
+ type: RollingUpdate
+ volumeClaimTemplates:
+ - metadata:
+ labels:
+ app: zookeeper
+ name: datadir-volume
+ spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 1Gi
+ volumeMode: Filesystem
diff --git a/tests/e2e/test_keeper.py b/tests/e2e/test_keeper.py
index ac31ea246..71d3251a2 100644
--- a/tests/e2e/test_keeper.py
+++ b/tests/e2e/test_keeper.py
@@ -1,4 +1,5 @@
-import time
+import os
+os.environ["TEST_NAMESPACE"]="test-keeper"
import e2e.clickhouse as clickhouse
import e2e.kubectl as kubectl
@@ -59,7 +60,7 @@ def insert_replicated_data(chi, pod_for_insert_data, create_tables, insert_table
)
-def check_zk_root_znode(chi, keeper_type, pod_count, retry_count=15):
+def check_zk_root_znode(chi, keeper_type, pod_count, ns, retry_count=15):
for pod_num in range(pod_count):
found = False
for i in range(retry_count):
@@ -82,7 +83,7 @@ def check_zk_root_znode(chi, keeper_type, pod_count, retry_count=15):
out = kubectl.launch(
f"exec {pod_prefix}-{pod_num} -- bash -ce '{keeper_cmd}'",
- ns=settings.test_namespace,
+ ns=ns,
ok_to_fail=True,
)
found = False
@@ -144,17 +145,17 @@ def rescale_zk_and_clickhouse(
return chi
-def delete_keeper_pvc(keeper_type):
+def delete_keeper_pvc(keeper_type, ns):
pvc_list = kubectl.get(
kind="pvc",
name="",
label=f"-l app={keeper_type}",
- ns=settings.test_namespace,
+ ns=ns,
ok_to_fail=False,
)
for pvc in pvc_list["items"]:
if pvc["metadata"]["name"][-2:] != "-0":
- kubectl.launch(f"delete pvc {pvc['metadata']['name']}", ns=settings.test_namespace)
+ kubectl.launch(f"delete pvc {pvc['metadata']['name']}", ns=ns)
def start_stop_zk_and_clickhouse(chi_name, ch_stop, keeper_replica_count, keeper_type, keeper_manifest_1_node,
@@ -167,6 +168,8 @@ def start_stop_zk_and_clickhouse(chi_name, ch_stop, keeper_replica_count, keeper
keeper_manifest = f"../../deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/{keeper_manifest}"
if keeper_type == "clickhouse-keeper":
keeper_manifest = f"../../deploy/clickhouse-keeper/clickhouse-keeper-manually/{keeper_manifest}"
+ if keeper_type == "clickhouse-keeper_with_chk":
+ keeper_manifest = f"../../deploy/clickhouse-keeper/clickhouse-keeper-with-CHK-resource/{keeper_manifest}"
if keeper_type == "zookeeper-operator":
keeper_manifest = f"../../deploy/zookeeper/zookeeper-with-zookeeper-operator/{keeper_manifest}"
@@ -189,14 +192,16 @@ def start_stop_zk_and_clickhouse(chi_name, ch_stop, keeper_replica_count, keeper
f"Pods expected={keeper_replica_count} actual={pod_counts}, wait {3*(i+1)} seconds"):
time.sleep(3*(i+1))
+def is_need_delete_keeper_pvc(keeper_type, keeper_manifest_1_node):
+ return keeper_type == "zookeeper" and ("scaleout-pvc" in keeper_manifest_1_node or "manual-teardown" in keeper_manifest_1_node)
@TestOutline
def test_keeper_rescale_outline(
self,
- keeper_type="zookeeper",
- pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0",
- keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only.yaml",
- keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only.yaml",
+ keeper_type,
+ pod_for_insert_data,
+ keeper_manifest_1_node,
+ keeper_manifest_3_node,
):
"""
test scenario for Zoo/Clickhouse Keeper
@@ -209,8 +214,8 @@ def test_keeper_rescale_outline(
"""
with When("Clean exists ClickHouse Keeper and ZooKeeper"):
- kubectl.delete_all_keeper(settings.test_namespace)
- kubectl.delete_all_chi(settings.test_namespace)
+ kubectl.delete_all_keeper(self.context.test_namespace)
+ kubectl.delete_all_chi(self.context.test_namespace)
with When("Install CH 1 node ZK 1 node"):
chi = rescale_zk_and_clickhouse(
@@ -223,7 +228,7 @@ def test_keeper_rescale_outline(
)
util.wait_clickhouse_cluster_ready(chi)
wait_keeper_ready(keeper_type=keeper_type, pod_count=1)
- check_zk_root_znode(chi, keeper_type, pod_count=1)
+ check_zk_root_znode(chi, keeper_type, pod_count=1, ns=self.context.test_namespace)
util.wait_clickhouse_no_readonly_replicas(chi)
insert_replicated_data(
chi,
@@ -244,7 +249,7 @@ def test_keeper_rescale_outline(
keeper_manifest_3_node=keeper_manifest_3_node,
)
wait_keeper_ready(keeper_type=keeper_type, pod_count=3)
- check_zk_root_znode(chi, keeper_type, pod_count=3)
+ check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace)
util.wait_clickhouse_cluster_ready(chi)
util.wait_clickhouse_no_readonly_replicas(chi)
@@ -264,9 +269,9 @@ def test_keeper_rescale_outline(
keeper_manifest_3_node=keeper_manifest_3_node,
)
wait_keeper_ready(keeper_type=keeper_type, pod_count=1)
- check_zk_root_znode(chi, keeper_type, pod_count=1)
+ check_zk_root_znode(chi, keeper_type, pod_count=1, ns=self.context.test_namespace)
if keeper_type == "zookeeper" and "scaleout-pvc" in keeper_manifest_1_node:
- delete_keeper_pvc(keeper_type=keeper_type)
+ delete_keeper_pvc(keeper_type=keeper_type, ns=self.context.test_namespace)
util.wait_clickhouse_cluster_ready(chi)
util.wait_clickhouse_no_readonly_replicas(chi)
@@ -285,7 +290,7 @@ def test_keeper_rescale_outline(
keeper_manifest_1_node=keeper_manifest_1_node,
keeper_manifest_3_node=keeper_manifest_3_node,
)
- check_zk_root_znode(chi, keeper_type, pod_count=3)
+ check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace)
for keeper_replica_count in [1, 3]:
with When("Stop CH + ZK"):
@@ -308,7 +313,7 @@ def test_keeper_rescale_outline(
)
with Then("check data in tables"):
- check_zk_root_znode(chi, keeper_type, pod_count=3)
+ check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace)
util.wait_clickhouse_cluster_ready(chi)
util.wait_clickhouse_no_readonly_replicas(chi)
for table_name, exptected_rows in {
@@ -364,38 +369,48 @@ def test_clickhouse_keeper_rescale_chk(self):
)
-@TestScenario
-@Name("test_zookeeper_operator_rescale. Check Zookeeper OPERATOR scale-up / scale-down cases")
-def test_zookeeper_operator_rescale(self):
- test_keeper_rescale_outline(
- keeper_type="zookeeper-operator",
- pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0",
- keeper_manifest_1_node="zookeeper-operator-1-node.yaml",
- keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml",
- )
+# @TestScenario
+# @Name("test_zookeeper_operator_rescale. Check Zookeeper OPERATOR scale-up / scale-down cases")
+# def test_zookeeper_operator_rescale(self):
+# test_keeper_rescale_outline(
+# keeper_type="zookeeper-operator",
+# pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0",
+# keeper_manifest_1_node="zookeeper-operator-1-node.yaml",
+# keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml",
+# )
+
+
+# @TestScenario
+# @Name("test_zookeeper_pvc_scaleout_rescale. Check ZK+PVC scale-up / scale-down cases")
+# def test_zookeeper_pvc_scaleout_rescale(self):
+# test_keeper_rescale_outline(
+# keeper_type="zookeeper",
+# pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0",
+# keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml",
+# keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml",
+# )
@TestScenario
-@Name("test_zookeeper_pvc_scaleout_rescale. Check ZK+PVC scale-up / scale-down cases")
-def test_zookeeper_pvc_scaleout_rescale(self):
+@Name("test_zookeeper_manual_teardown_rescale. Check ZK+Manual TEARDOWN scale-up / scale-down cases")
+def test_zookeeper_manual_teardown_rescale(self):
test_keeper_rescale_outline(
keeper_type="zookeeper",
pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0",
- keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml",
- keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml",
+ keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml",
+ keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml",
)
-
@TestOutline
def test_keeper_probes_outline(
self,
- keeper_type="zookeeper",
- keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only.yaml",
- keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only.yaml",
+ keeper_type,
+ keeper_manifest_1_node,
+ keeper_manifest_3_node,
):
with When("Clean exists ClickHouse Keeper and ZooKeeper"):
- kubectl.delete_all_chi(settings.test_namespace)
- kubectl.delete_all_keeper(settings.test_namespace)
+ kubectl.delete_all_chi(self.context.test_namespace)
+ kubectl.delete_all_keeper(self.context.test_namespace)
with Then("Install CH 2 node ZK 3 node"):
chi = rescale_zk_and_clickhouse(
@@ -409,7 +424,7 @@ def test_keeper_probes_outline(
)
util.wait_clickhouse_cluster_ready(chi)
wait_keeper_ready(keeper_type=keeper_type, pod_count=3)
- check_zk_root_znode(chi, keeper_type, pod_count=3)
+ check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace)
util.wait_clickhouse_no_readonly_replicas(chi)
with Then("Create keeper_bench table"):
@@ -430,7 +445,7 @@ def test_keeper_probes_outline(
max_parts_in_total=1000000;
""",
)
- with Then("Insert data to keeper_bench for make zookeeper workload"):
+ with Then("Insert data to keeper_bench for make keeper workload"):
pod_prefix = "chi-test-cluster-for-zk-default"
rows = 100000
for pod in ("0-0-0", "0-1-0"):
@@ -476,34 +491,34 @@ def test_zookeeper_probes_workload(self):
)
-@TestScenario
-@Name(
- "test_zookeeper_pvc_probes_workload. Liveness + Readiness probes shall works fine "
- "under workload in multi-datacenter installation"
-)
-def test_zookeeper_pvc_probes_workload(self):
- test_keeper_probes_outline(
- keeper_type="zookeeper",
- keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml",
- keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml",
- )
-
-
-@TestScenario
-@Name(
- "test_zookeeper_operator_probes_workload. Liveness + Readiness probes shall works fine "
- "under workload in multi-datacenter installation"
-)
-def test_zookeeper_operator_probes_workload(self):
- test_keeper_probes_outline(
- keeper_type="zookeeper-operator",
- keeper_manifest_1_node="zookeeper-operator-1-node.yaml",
- keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml",
-
- # uncomment only if you know how to use it
- # keeper_manifest_1_node='zookeeper-operator-1-node-with-custom-probes.yaml',
- # keeper_manifest_3_node='zookeeper-operator-3-nodes-with-custom-probes.yaml',
- )
+# @TestScenario
+# @Name(
+# "test_zookeeper_pvc_probes_workload. Liveness + Readiness probes shall works fine "
+# "under workload in multi-datacenter installation"
+# )
+# def test_zookeeper_pvc_probes_workload(self):
+# test_keeper_probes_outline(
+# keeper_type="zookeeper",
+# keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml",
+# keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml",
+# )
+
+
+# @TestScenario
+# @Name(
+# "test_zookeeper_operator_probes_workload. Liveness + Readiness probes shall works fine "
+# "under workload in multi-datacenter installation"
+# )
+# def test_zookeeper_operator_probes_workload(self):
+# test_keeper_probes_outline(
+# keeper_type="zookeeper-operator",
+# keeper_manifest_1_node="zookeeper-operator-1-node.yaml",
+# keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml",
+#
+# # uncomment only if you know how to use it
+# # keeper_manifest_1_node='zookeeper-operator-1-node-with-custom-probes.yaml',
+# # keeper_manifest_3_node='zookeeper-operator-3-nodes-with-custom-probes.yaml',
+# )
@TestScenario
@@ -521,7 +536,7 @@ def test_clickhouse_keeper_probes_workload(self):
@TestScenario
@Name(
- "test_clickhouse_keeper_probes_workload_with_CHKI. Liveness + Readiness probes shall works fine "
+ "test_clickhouse_keeper_probes_workload_with_chk. Liveness + Readiness probes shall works fine "
"under workload in multi-datacenter installation"
)
@Requirements(RQ_SRS_026_ClickHouseOperator_CustomResource_Kind_ClickHouseKeeperInstallation("1.0"))
@@ -538,27 +553,31 @@ def test_clickhouse_keeper_probes_workload_with_chk(self):
def test(self):
with Given("set settings"):
set_settings()
- self.context.test_namespace = "test"
- self.context.operator_namespace = "test"
+ self.context.test_namespace = "test-keeper"
+ self.context.operator_namespace = "test-keeper"
with Given("I create shell"):
shell = get_shell()
self.context.shell = shell
+
+ util.clean_namespace(delete_chi=True, delete_keeper=True)
+ util.install_operator_if_not_exist()
+
all_tests = [
- test_zookeeper_operator_rescale,
+ # test_zookeeper_operator_rescale,
+ # test_zookeeper_pvc_scaleout_rescale,
test_clickhouse_keeper_rescale,
test_clickhouse_keeper_rescale_chk,
- test_zookeeper_pvc_scaleout_rescale,
+ test_zookeeper_manual_teardown_rescale,
test_zookeeper_rescale,
+ # test_zookeeper_pvc_probes_workload,
+ # test_zookeeper_operator_probes_workload,
test_zookeeper_probes_workload,
- test_zookeeper_pvc_probes_workload,
- test_zookeeper_operator_probes_workload,
test_clickhouse_keeper_probes_workload,
test_clickhouse_keeper_probes_workload_with_chk,
]
-
- util.clean_namespace(delete_chi=True, delete_keeper=True)
- util.install_operator_if_not_exist()
for t in all_tests:
Scenario(test=t)()
+
+ util.clean_namespace(delete_chi=True, delete_keeper=True)
diff --git a/tests/e2e/util.py b/tests/e2e/util.py
index 97aa98691..e99f05750 100644
--- a/tests/e2e/util.py
+++ b/tests/e2e/util.py
@@ -100,7 +100,7 @@ def require_keeper(keeper_manifest="", keeper_type=settings.keeper_type, force_i
if doc["kind"] in ("StatefulSet", "ZookeeperCluster"):
keeper_nodes = doc["spec"]["replicas"]
expected_docs = {
- "zookeeper": 5 if "scaleout-pvc" in keeper_manifest else 4,
+ "zookeeper": 5 if "scaleout-pvc" in keeper_manifest else 6 if "manual-teardown" in keeper_manifest else 4,
"clickhouse-keeper": 7,
"chk": 2,
"zookeeper-operator": 3 if "probes" in keeper_manifest else 1,