diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index b148a7326..a1cd34ffb 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -68,6 +68,19 @@ jobs: minikube image load operator.tar minikube image load metrics-exporter.tar + - name: Deploy prometheus + run: | + cp ./deploy/prometheus/prometheus-sensitive-data.example.sh ./deploy/prometheus/prometheus-sensitive-data.sh + NO_WAIT=1 bash ./deploy/prometheus/create-prometheus.sh + + - name: Deploy minio + run: | + NO_WAIT=1 bash ./deploy/minio/create-minio.sh + + - name: Deploy zookeeper-operator + run: | + bash ./deploy/zookeeper/zookeeper-with-zookeeper-operator/install-zookeeper-operator.sh + - name: Run Tests id: run-tests continue-on-error: true @@ -95,7 +108,7 @@ jobs: test_mode="--test-to-end" fi - ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} $test_mode --trim-results on -o short --native --log ./tests/raw.log + ~/venv/qa/bin/python3 ./tests/regression.py --only=/regression/e2e.test_operator/${ONLY} $test_mode --trim-results on -o short --no-colors --native --log ./tests/raw.log test_result=$? ~/venv/qa/bin/tfs --no-colors transform compact ./tests/raw.log ./tests/compact.log ~/venv/qa/bin/tfs --no-colors transform nice ./tests/raw.log ./tests/nice.log.txt @@ -123,7 +136,7 @@ jobs: retention-days: 90 - name: Test Failed - if: ${{ failure() }} + if: ${{ steps.run-tests.outputs.test_result != '0' }} uses: actions/github-script@v3 with: script: | diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml new file mode 100644 index 000000000..659e73d90 --- /dev/null +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml @@ -0,0 +1,662 @@ +# Following manifests based on https://github.com/pravega/zookeeper-operator/ +--- +# service for REST administration and reconfiguration +apiVersion: v1 +kind: Service +metadata: + labels: + app: zookeeper + name: zookeeper-admin-server +spec: + type: ClusterIP + ports: + - name: tcp-admin-server + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: zookeeper + +--- +# service for clickhouse zookeeper client connections +apiVersion: v1 +kind: Service +metadata: + labels: + app: zookeeper + name: zookeeper +spec: + type: ClusterIP + ports: + - name: tcp-client + port: 2181 + protocol: TCP + targetPort: 2181 + selector: + app: zookeeper + what: node +--- +# headless service for Zookeeper Quorum Election and service name +apiVersion: v1 +kind: Service +metadata: + labels: + app: zookeeper + name: zookeeper-headless +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: false + ports: + - name: tcp-client + port: 2181 + protocol: TCP + targetPort: 2181 + - name: tcp-quorum + port: 2888 + protocol: TCP + targetPort: 2888 + - name: tcp-leader-election + port: 3888 + protocol: TCP + targetPort: 3888 + - name: tcp-metrics + port: 7000 + protocol: TCP + targetPort: 7000 + - name: tcp-admin-server + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: zookeeper +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: zookeeper +spec: + selector: + matchLabels: + app: zookeeper + maxUnavailable: 1 +--- +# ConfigMap with common startup scripts and base config +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: zookeeper + name: zookeeper-scripts +data: + env.sh: | + #!/usr/bin/env bash + export DOMAIN=`hostname -d` + export QUORUM_PORT=2888 + export LEADER_PORT=3888 + export CLIENT_HOST=zookeeper + export CLIENT_PORT=2181 + export ADMIN_SERVER_HOST=zookeeper-admin-server + export ADMIN_SERVER_PORT=8080 + export CLUSTER_NAME=zookeeper + export ZOO_LOG4J_PROP="WARN, CONSOLE" + java.env: | + JVMFLAGS="-Xms128M -Xmx4G -XX:ActiveProcessorCount=8 -XX:+AlwaysPreTouch -Djute.maxbuffer=8388608 -XX:MaxGCPauseMillis=50" + log4j-quiet.properties: | + log4j.rootLogger=CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=ERROR + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + log4j.properties: | + zookeeper.root.logger=CONSOLE + zookeeper.console.threshold=INFO + log4j.rootLogger=${zookeeper.root.logger} + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold} + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + logback.xml: | + + + + + %d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + + + ${zookeeper.console.threshold} + + + + + + + + zoo.cfg: | + 4lw.commands.whitelist=* + dataDir=/var/lib/zookeeper/data + dataLogDir=/var/lib/zookeeper/datalog + standaloneEnabled=false + reconfigEnabled=true + skipACL=yes + metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider + metricsProvider.httpPort=7000 + metricsProvider.exportJvmInfo=true + tickTime=2000 + initLimit=300 + syncLimit=10 + maxClientCnxns=2000 + maxTimeToWaitForEpoch=2000 + globalOutstandingLimit=1000 + preAllocSize=131072 + snapCount=1500000 + commitLogCount=500 + snapSizeLimitInKb=4194304 + maxCnxns=0 + minSessionTimeout=4000 + maxSessionTimeout=40000 + autopurge.snapRetainCount=3 + autopurge.purgeInterval=1 + quorumListenOnAllIPs=false + admin.serverPort=8080 + dynamicConfigFile=/var/lib/zookeeper/zoo.cfg.dynamic + zookeeperFunctions.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + set -ex + function zkConfig() { + echo "$HOST.$DOMAIN:$QUORUM_PORT:$LEADER_PORT:$ROLE;$CLIENT_PORT" + } + function zkConnectionString() { + # If the client service address is not yet available, then return localhost + set +e + getent hosts "${CLIENT_HOST}" 2>/dev/null 1>/dev/null + if [[ $? -ne 0 ]]; then + set -e + echo "localhost:${CLIENT_PORT}" + else + set -e + echo "${CLIENT_HOST}:${CLIENT_PORT}" + fi + } + zookeeperStart.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + + set -ex + # TODO think how to add nslookup to docker image + # apt-get update && apt-get install --no-install-recommends -y dnsutils procps + source /conf/env.sh + source /conf/zookeeperFunctions.sh + + HOST=`hostname -s` + VOLUME_DIR=/var/lib/zookeeper + MYID_FILE=$VOLUME_DIR/data/myid + LOG4J_CONF=/conf/log4j-quiet.properties + DYNCONFIG=$VOLUME_DIR/zoo.cfg.dynamic + STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg + + if [[ ! -d "$VOLUME_DIR/data" ]]; then + mkdir -p $VOLUME_DIR/data + chown zookeeper $VOLUME_DIR/data + fi + if [[ ! -d "$VOLUME_DIR/datalog" ]]; then + mkdir -p $VOLUME_DIR/datalog + chown zookeeper $VOLUME_DIR/datalog + fi + + # Extract resource name and this members ordinal value from pod hostname + if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then + NAME=${BASH_REMATCH[1]} + ORD=${BASH_REMATCH[2]} + else + echo Failed to parse name and ordinal of Pod + exit 1 + fi + + MYID=$((ORD+1)) + + # Values for first startup + WRITE_CONFIGURATION=true + REGISTER_NODE=true + ONDISK_MYID_CONFIG=false + ONDISK_DYN_CONFIG=false + + # Check validity of on-disk configuration + if [ -f $MYID_FILE ]; then + EXISTING_ID="`cat $MYID_FILE`" + if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then + # If Id is correct and configuration is present under `/var/lib/zookeeper/conf` + ONDISK_MYID_CONFIG=true + fi + fi + + if [ -f $DYNCONFIG ]; then + ONDISK_DYN_CONFIG=true + fi + + # Determine if there is an ensemble available to join by checking the service domain + set +e + getent hosts $DOMAIN # This only performs a dns lookup + if [[ $? -eq 0 ]]; then + ACTIVE_ENSEMBLE=true + else + ACTIVE_ENSEMBLE=false + fi + # elif nslookup $DOMAIN | grep -q "server can't find $DOMAIN"; then + # echo "there is no active ensemble" + # ACTIVE_ENSEMBLE=false + # else + # If an nslookup of the headless service domain fails, then there is no + # active ensemble yet, but in certain cases nslookup of headless service + # takes a while to come up even if there is active ensemble + # ACTIVE_ENSEMBLE=false + # declare -i count=10 + # while [[ $count -ge 0 ]] + # do + # sleep 1 + # ((count=count-1)) + # getent hosts $DOMAIN + # if [[ $? -eq 0 ]]; then + # ACTIVE_ENSEMBLE=true + # break + # fi + # done + # fi + + if [[ "$ONDISK_MYID_CONFIG" == true && "$ONDISK_DYN_CONFIG" == true ]]; then + # If Configuration is present, we assume, there is no need to write configuration. + WRITE_CONFIGURATION=false + else + WRITE_CONFIGURATION=true + fi + + if [[ "$ACTIVE_ENSEMBLE" == false ]]; then + # This is the first node being added to the cluster or headless service not yet available + REGISTER_NODE=false + else + # If ensemble exists, check to see if this node is already a member. + if [[ "$ONDISK_MYID_CONFIG" == false || "$ONDISK_DYN_CONFIG" == false ]]; then + REGISTER_NODE=true + elif [[ -f "$STATIC_CONFIG" ]]; then + DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="` + DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=} + if [[ "0" == $(grep -c "server.${MYID}=" "${DYN_CFG_FILE}") ]]; then + REGISTER_NODE=true + else + REGISTER_NODE=false + fi + else + REGISTER_NODE=false + fi + fi + + if [[ "$WRITE_CONFIGURATION" == true ]]; then + echo "Writing myid: $MYID to: $MYID_FILE." + echo $MYID > $MYID_FILE + if [[ $MYID -eq 1 ]]; then + ROLE=participant + echo Initial initialization of ordinal 0 pod, creating new config. + ZKCONFIG=$(zkConfig) + echo Writing bootstrap configuration with the following config: + echo $ZKCONFIG + echo $MYID > $MYID_FILE + echo "server.${MYID}=${ZKCONFIG}" > $DYNCONFIG + fi + fi + + export ZOOCFGDIR=/var/lib/zookeeper/conf + if [[ "$REGISTER_NODE" == true ]]; then + ROLE=observer + ZKURL=$(zkConnectionString) + ZKCONFIG=$(zkConfig) + + set -e + echo Registering node and writing local configuration to disk. + zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG" | grep -E '^server\.[0-9]+=' > $DYNCONFIG + if ls $ZOOCFGDIR/zoo.cfg.dynamic.* 1> /dev/null 2>&1; then + DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="` + DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=} + cp -fv $DYNCONFIG $DYN_CFG_FILE + fi + set +e + fi + + + echo Copying /conf contents to writable directory, to support Zookeeper dynamic reconfiguration + if [[ ! -d "$ZOOCFGDIR" ]]; then + mkdir $ZOOCFGDIR + cp -f /conf/zoo.cfg $ZOOCFGDIR + else + echo Copying the /conf/zoo.cfg contents except the dynamic config file during restart + echo -e "$( head -n -1 /conf/zoo.cfg )""\n""$( tail -n 1 "$STATIC_CONFIG" )" > $STATIC_CONFIG + fi + cp -f /conf/log4j.properties $ZOOCFGDIR + cp -f /conf/log4j-quiet.properties $ZOOCFGDIR + cp -f /conf/logback.xml $ZOOCFGDIR + cp -f /conf/env.sh $ZOOCFGDIR + cp -f /conf/java.env $ZOOCFGDIR + + if [ -f $DYNCONFIG ]; then + # Node registered, start server + echo Starting zookeeper service + zkServer.sh --config $ZOOCFGDIR start-foreground + else + echo "Node failed to register!" + exit 1 + fi + + zookeeperTeardown.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + + set -ex + exec > /proc/1/fd/1 + exec 2> /proc/1/fd/2 + + source /conf/env.sh + source /conf/zookeeperFunctions.sh + + VOLUME_DIR=/var/lib/zookeeper + MYID_FILE=$VOLUME_DIR/data/myid + + # Check to see if zookeeper service for this node is a participant + set +e + ZKURL=$(zkConnectionString) + set -e + MYID=`cat $MYID_FILE` + CLUSTER_SIZE=$(zkCli.sh -server ${ZKURL} config | grep -c -E '^server\.[0-9]+=') + echo "CLUSTER_SIZE=$CLUSTER_SIZE, MyId=$MYID" + + # Optional desired ensemble size. + if [[ -f /conf/ensemble.env ]]; then + set +e + source /conf/ensemble.env + set -e + fi + DESIRED_ENSEMBLE_SIZE=${DESIRED_ENSEMBLE_SIZE:-3} + echo "DESIRED_ENSEMBLE_SIZE=$DESIRED_ENSEMBLE_SIZE" + + # NEED_SCALE_DOWN can be set explicitly(to 1). Otherwise, set it automatically + # when the desired ensemble size is smaller than the current cluster size. + if [[ "$DESIRED_ENSEMBLE_SIZE" -lt "$CLUSTER_SIZE" ]]; then + echo "$DESIRED_ENSEMBLE_SIZE < $CLUSTER_SIZE, setting NEED_SCALE_DOWN" + NEED_SCALE_DOWN=1 + fi + + if [[ "$MYID" -le "1" ]]; then + echo "For id=1 scale down is forbidden" + NEED_SCALE_DOWN=0 + fi + + if [[ "$NEED_SCALE_DOWN" == "1" ]]; then + echo "NEED_SCALE_DOWN=1" + + set +e + zkCli.sh -server ${ZKURL} reconfig -remove $MYID + echo $? + set -e + + # give some time to propagate the changes + sleep 2 + fi + + # Kill the primary process ourselves to circumvent the terminationGracePeriodSeconds + ps -ef | grep zoo.cfg | grep -v grep | awk '{print $2}' | xargs kill + + + zookeeperLive.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + set -ex + source /conf/env.sh + OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;) + # Check to see if zookeeper service answers + if [[ "$OK" == "imok" ]]; then + exit 0 + else + exit 1 + fi + + zookeeperReady.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + set -ex + source /conf/env.sh + source /conf/zookeeperFunctions.sh + + HOST=`hostname -s` + VOLUME_DIR=/var/lib/zookeeper + MYID_FILE=$VOLUME_DIR/data/myid + LOG4J_CONF=/conf/log4j-quiet.properties + STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg + + OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;) + + # Check to see if zookeeper service answers + if [[ "$OK" == "imok" ]]; then + set +e + getent hosts $DOMAIN + if [[ $? -ne 0 ]]; then + set -e + echo "There is no active ensemble, skipping readiness probe..." + exit 0 + else + set -e + # An ensemble exists, check to see if this node is already a member. + # Check to see if zookeeper service for this node is a participant + set +e + # Extract resource name and this members' ordinal value from pod hostname + HOST=`hostname -s` + if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then + NAME=${BASH_REMATCH[1]} + ORD=${BASH_REMATCH[2]} + else + echo Failed to parse name and ordinal of Pod + exit 1 + fi + MYID=$((ORD+1)) + ONDISK_CONFIG=false + if [ -f $MYID_FILE ]; then + EXISTING_ID="`cat $MYID_FILE`" + if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then + #If Id is correct and configuration is present under `/var/lib/zookeeper/conf` + ONDISK_CONFIG=true + DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="` + DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=} + SERVER_FOUND=`cat $DYN_CFG_FILE | grep -c "server.${MYID}="` + if [[ "$SERVER_FOUND" == "0" ]]; then + echo "Server not found in ensemble. Exiting ..." + exit 1 + fi + SERVER=`cat $DYN_CFG_FILE | grep "server.${MYID}="` + if [[ "$SERVER" == *"participant"* ]]; then + ROLE=participant + elif [[ "$SERVER" == *"observer"* ]]; then + ROLE=observer + fi + fi + fi + + if [[ "$ROLE" == "participant" ]]; then + echo "Zookeeper service is available and an active participant" + exit 0 + elif [[ "$ROLE" == "observer" ]]; then + echo "Zookeeper service is ready to be upgraded from observer to participant." + ROLE=participant + ZKURL=$(zkConnectionString) + ZKCONFIG=$(zkConfig) + + # Note: according to https://zookeeper.apache.org/doc/r3.9.3/zookeeperReconfig.html + # > One can modify any of the ports of a server, or its role (participant/observer) by adding it to the ensemble with different parameters. + # > This works in both the incremental and the bulk reconfiguration modes. + # > It is not necessary to remove the server and then add it back; + # > just specify the new parameters as if the server is not yet in the system. + # > The server will detect the configuration change and perform the necessary adjustments. + + zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG" + exit 0 + else + echo "Something has gone wrong. Unable to determine zookeeper role." + exit 1 + fi + fi + + else + echo "Zookeeper service is not available for requests" + exit 1 + fi + + ensemble.env: | + # DESIRED_ENSEMBLE_SIZE specifies desired number of nodes in the ensemble. + # Set to an integer >= 1. If unset, scripts default to 3. + # Example: + DESIRED_ENSEMBLE_SIZE=1 + + # Optional: NEED_SCALE_DOWN override. Allow to force scale-down logic during shutdown + # even if the cluster has already the correct size + NEED_SCALE_DOWN=1 + +--- +# Main StatefulSet +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app: zookeeper + name: zookeeper + +spec: +# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention +# requires alpha `StatefulSetAutoDeletePVC=true` feature gate +# persistentVolumeClaimRetentionPolicy: +# whenDeleted: Delete +# whenScaled: Delete + podManagementPolicy: OrderedReady + replicas: 1 + selector: + matchLabels: + app: zookeeper + serviceName: zookeeper-headless + template: + metadata: + generateName: zookeeper + labels: + app: zookeeper + what: node + annotations: + prometheus.io/port: '7000' + prometheus.io/scrape: 'true' + spec: +# affinity: +# podAntiAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# - labelSelector: +# matchExpressions: +# - key: "app" +# operator: In +# values: +# - zookeeper +# # TODO think about multi-AZ EKS +# # topologyKey: topology.kubernetes.io/zone +# topologyKey: "kubernetes.io/hostname" +# nodeAffinity: +# preferredDuringSchedulingIgnoredDuringExecution: +# - preference: +# matchExpressions: +# - key: topology.kubernetes.io/zone +# operator: In +# values: [] # to be added programatically +# weight: 100 + containers: + - name: zookeeper + command: + - /conf/zookeeperStart.sh + image: docker.io/zookeeper:3.8.4 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /conf/zookeeperTeardown.sh + livenessProbe: + exec: + command: + - /conf/zookeeperLive.sh + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 10 + readinessProbe: + exec: + command: + - /conf/zookeeperReady.sh + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 10 + ports: + - containerPort: 2181 + name: client + protocol: TCP + - containerPort: 2888 + name: quorum + protocol: TCP + - containerPort: 3888 + name: leader-election + protocol: TCP + - containerPort: 7000 + name: metrics + protocol: TCP + - containerPort: 8080 + name: admin-server + protocol: TCP + volumeMounts: + - mountPath: /var/lib/zookeeper + name: datadir-volume + - mountPath: /conf + name: conf + restartPolicy: Always + schedulerName: default-scheduler + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + name: zookeeper-scripts + defaultMode: 0755 + name: conf + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - metadata: + labels: + app: zookeeper + name: datadir-volume + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + volumeMode: Filesystem diff --git a/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml new file mode 100644 index 000000000..1c56abcdb --- /dev/null +++ b/deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml @@ -0,0 +1,662 @@ +# Following manifests based on https://github.com/pravega/zookeeper-operator/ +--- +# service for REST administration and reconfiguration +apiVersion: v1 +kind: Service +metadata: + labels: + app: zookeeper + name: zookeeper-admin-server +spec: + type: ClusterIP + ports: + - name: tcp-admin-server + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: zookeeper + +--- +# service for clickhouse zookeeper client connections +apiVersion: v1 +kind: Service +metadata: + labels: + app: zookeeper + name: zookeeper +spec: + type: ClusterIP + ports: + - name: tcp-client + port: 2181 + protocol: TCP + targetPort: 2181 + selector: + app: zookeeper + what: node +--- +# headless service for Zookeeper Quorum Election and service name +apiVersion: v1 +kind: Service +metadata: + labels: + app: zookeeper + name: zookeeper-headless +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: false + ports: + - name: tcp-client + port: 2181 + protocol: TCP + targetPort: 2181 + - name: tcp-quorum + port: 2888 + protocol: TCP + targetPort: 2888 + - name: tcp-leader-election + port: 3888 + protocol: TCP + targetPort: 3888 + - name: tcp-metrics + port: 7000 + protocol: TCP + targetPort: 7000 + - name: tcp-admin-server + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: zookeeper +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: zookeeper +spec: + selector: + matchLabels: + app: zookeeper + maxUnavailable: 1 +--- +# ConfigMap with common startup scripts and base config +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: zookeeper + name: zookeeper-scripts +data: + env.sh: | + #!/usr/bin/env bash + export DOMAIN=`hostname -d` + export QUORUM_PORT=2888 + export LEADER_PORT=3888 + export CLIENT_HOST=zookeeper + export CLIENT_PORT=2181 + export ADMIN_SERVER_HOST=zookeeper-admin-server + export ADMIN_SERVER_PORT=8080 + export CLUSTER_NAME=zookeeper + export ZOO_LOG4J_PROP="WARN, CONSOLE" + java.env: | + JVMFLAGS="-Xms128M -Xmx4G -XX:ActiveProcessorCount=8 -XX:+AlwaysPreTouch -Djute.maxbuffer=8388608 -XX:MaxGCPauseMillis=50" + log4j-quiet.properties: | + log4j.rootLogger=CONSOLE + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=ERROR + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + log4j.properties: | + zookeeper.root.logger=CONSOLE + zookeeper.console.threshold=INFO + log4j.rootLogger=${zookeeper.root.logger} + log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender + log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold} + log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout + log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + logback.xml: | + + + + + %d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n + + + ${zookeeper.console.threshold} + + + + + + + + zoo.cfg: | + 4lw.commands.whitelist=* + dataDir=/var/lib/zookeeper/data + dataLogDir=/var/lib/zookeeper/datalog + standaloneEnabled=false + reconfigEnabled=true + skipACL=yes + metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider + metricsProvider.httpPort=7000 + metricsProvider.exportJvmInfo=true + tickTime=2000 + initLimit=300 + syncLimit=10 + maxClientCnxns=2000 + maxTimeToWaitForEpoch=2000 + globalOutstandingLimit=1000 + preAllocSize=131072 + snapCount=1500000 + commitLogCount=500 + snapSizeLimitInKb=4194304 + maxCnxns=0 + minSessionTimeout=4000 + maxSessionTimeout=40000 + autopurge.snapRetainCount=3 + autopurge.purgeInterval=1 + quorumListenOnAllIPs=false + admin.serverPort=8080 + dynamicConfigFile=/var/lib/zookeeper/zoo.cfg.dynamic + zookeeperFunctions.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + set -ex + function zkConfig() { + echo "$HOST.$DOMAIN:$QUORUM_PORT:$LEADER_PORT:$ROLE;$CLIENT_PORT" + } + function zkConnectionString() { + # If the client service address is not yet available, then return localhost + set +e + getent hosts "${CLIENT_HOST}" 2>/dev/null 1>/dev/null + if [[ $? -ne 0 ]]; then + set -e + echo "localhost:${CLIENT_PORT}" + else + set -e + echo "${CLIENT_HOST}:${CLIENT_PORT}" + fi + } + zookeeperStart.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + + set -ex + # TODO think how to add nslookup to docker image + # apt-get update && apt-get install --no-install-recommends -y dnsutils procps + source /conf/env.sh + source /conf/zookeeperFunctions.sh + + HOST=`hostname -s` + VOLUME_DIR=/var/lib/zookeeper + MYID_FILE=$VOLUME_DIR/data/myid + LOG4J_CONF=/conf/log4j-quiet.properties + DYNCONFIG=$VOLUME_DIR/zoo.cfg.dynamic + STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg + + if [[ ! -d "$VOLUME_DIR/data" ]]; then + mkdir -p $VOLUME_DIR/data + chown zookeeper $VOLUME_DIR/data + fi + if [[ ! -d "$VOLUME_DIR/datalog" ]]; then + mkdir -p $VOLUME_DIR/datalog + chown zookeeper $VOLUME_DIR/datalog + fi + + # Extract resource name and this members ordinal value from pod hostname + if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then + NAME=${BASH_REMATCH[1]} + ORD=${BASH_REMATCH[2]} + else + echo Failed to parse name and ordinal of Pod + exit 1 + fi + + MYID=$((ORD+1)) + + # Values for first startup + WRITE_CONFIGURATION=true + REGISTER_NODE=true + ONDISK_MYID_CONFIG=false + ONDISK_DYN_CONFIG=false + + # Check validity of on-disk configuration + if [ -f $MYID_FILE ]; then + EXISTING_ID="`cat $MYID_FILE`" + if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then + # If Id is correct and configuration is present under `/var/lib/zookeeper/conf` + ONDISK_MYID_CONFIG=true + fi + fi + + if [ -f $DYNCONFIG ]; then + ONDISK_DYN_CONFIG=true + fi + + # Determine if there is an ensemble available to join by checking the service domain + set +e + getent hosts $DOMAIN # This only performs a dns lookup + if [[ $? -eq 0 ]]; then + ACTIVE_ENSEMBLE=true + else + ACTIVE_ENSEMBLE=false + fi + # elif nslookup $DOMAIN | grep -q "server can't find $DOMAIN"; then + # echo "there is no active ensemble" + # ACTIVE_ENSEMBLE=false + # else + # If an nslookup of the headless service domain fails, then there is no + # active ensemble yet, but in certain cases nslookup of headless service + # takes a while to come up even if there is active ensemble + # ACTIVE_ENSEMBLE=false + # declare -i count=10 + # while [[ $count -ge 0 ]] + # do + # sleep 1 + # ((count=count-1)) + # getent hosts $DOMAIN + # if [[ $? -eq 0 ]]; then + # ACTIVE_ENSEMBLE=true + # break + # fi + # done + # fi + + if [[ "$ONDISK_MYID_CONFIG" == true && "$ONDISK_DYN_CONFIG" == true ]]; then + # If Configuration is present, we assume, there is no need to write configuration. + WRITE_CONFIGURATION=false + else + WRITE_CONFIGURATION=true + fi + + if [[ "$ACTIVE_ENSEMBLE" == false ]]; then + # This is the first node being added to the cluster or headless service not yet available + REGISTER_NODE=false + else + # If ensemble exists, check to see if this node is already a member. + if [[ "$ONDISK_MYID_CONFIG" == false || "$ONDISK_DYN_CONFIG" == false ]]; then + REGISTER_NODE=true + elif [[ -f "$STATIC_CONFIG" ]]; then + DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="` + DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=} + if [[ "0" == $(grep -c "server.${MYID}=" "${DYN_CFG_FILE}") ]]; then + REGISTER_NODE=true + else + REGISTER_NODE=false + fi + else + REGISTER_NODE=false + fi + fi + + if [[ "$WRITE_CONFIGURATION" == true ]]; then + echo "Writing myid: $MYID to: $MYID_FILE." + echo $MYID > $MYID_FILE + if [[ $MYID -eq 1 ]]; then + ROLE=participant + echo Initial initialization of ordinal 0 pod, creating new config. + ZKCONFIG=$(zkConfig) + echo Writing bootstrap configuration with the following config: + echo $ZKCONFIG + echo $MYID > $MYID_FILE + echo "server.${MYID}=${ZKCONFIG}" > $DYNCONFIG + fi + fi + + export ZOOCFGDIR=/var/lib/zookeeper/conf + if [[ "$REGISTER_NODE" == true ]]; then + ROLE=observer + ZKURL=$(zkConnectionString) + ZKCONFIG=$(zkConfig) + + set -e + echo Registering node and writing local configuration to disk. + zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG" | grep -E '^server\.[0-9]+=' > $DYNCONFIG + if ls $ZOOCFGDIR/zoo.cfg.dynamic.* 1> /dev/null 2>&1; then + DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="` + DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=} + cp -fv $DYNCONFIG $DYN_CFG_FILE + fi + set +e + fi + + + echo Copying /conf contents to writable directory, to support Zookeeper dynamic reconfiguration + if [[ ! -d "$ZOOCFGDIR" ]]; then + mkdir $ZOOCFGDIR + cp -f /conf/zoo.cfg $ZOOCFGDIR + else + echo Copying the /conf/zoo.cfg contents except the dynamic config file during restart + echo -e "$( head -n -1 /conf/zoo.cfg )""\n""$( tail -n 1 "$STATIC_CONFIG" )" > $STATIC_CONFIG + fi + cp -f /conf/log4j.properties $ZOOCFGDIR + cp -f /conf/log4j-quiet.properties $ZOOCFGDIR + cp -f /conf/logback.xml $ZOOCFGDIR + cp -f /conf/env.sh $ZOOCFGDIR + cp -f /conf/java.env $ZOOCFGDIR + + if [ -f $DYNCONFIG ]; then + # Node registered, start server + echo Starting zookeeper service + zkServer.sh --config $ZOOCFGDIR start-foreground + else + echo "Node failed to register!" + exit 1 + fi + + zookeeperTeardown.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + + set -ex + exec > /proc/1/fd/1 + exec 2> /proc/1/fd/2 + + source /conf/env.sh + source /conf/zookeeperFunctions.sh + + VOLUME_DIR=/var/lib/zookeeper + MYID_FILE=$VOLUME_DIR/data/myid + + # Check to see if zookeeper service for this node is a participant + set +e + ZKURL=$(zkConnectionString) + set -e + MYID=`cat $MYID_FILE` + CLUSTER_SIZE=$(zkCli.sh -server ${ZKURL} config | grep -c -E '^server\.[0-9]+=') + echo "CLUSTER_SIZE=$CLUSTER_SIZE, MyId=$MYID" + + # Optional desired ensemble size. + if [[ -f /conf/ensemble.env ]]; then + set +e + source /conf/ensemble.env + set -e + fi + DESIRED_ENSEMBLE_SIZE=${DESIRED_ENSEMBLE_SIZE:-3} + echo "DESIRED_ENSEMBLE_SIZE=$DESIRED_ENSEMBLE_SIZE" + + # NEED_SCALE_DOWN can be set explicitly(to 1). Otherwise, set it automatically + # when the desired ensemble size is smaller than the current cluster size. + if [[ "$DESIRED_ENSEMBLE_SIZE" -lt "$CLUSTER_SIZE" ]]; then + echo "$DESIRED_ENSEMBLE_SIZE < $CLUSTER_SIZE, setting NEED_SCALE_DOWN" + NEED_SCALE_DOWN=1 + fi + + if [[ "$MYID" -le "1" ]]; then + echo "For id=1 scale down is forbidden" + NEED_SCALE_DOWN=0 + fi + + if [[ "$NEED_SCALE_DOWN" == "1" ]]; then + echo "NEED_SCALE_DOWN=1" + + set +e + zkCli.sh -server ${ZKURL} reconfig -remove $MYID + echo $? + set -e + + # give some time to propagate the changes + sleep 2 + fi + + # Kill the primary process ourselves to circumvent the terminationGracePeriodSeconds + ps -ef | grep zoo.cfg | grep -v grep | awk '{print $2}' | xargs kill + + + zookeeperLive.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + set -ex + source /conf/env.sh + OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;) + # Check to see if zookeeper service answers + if [[ "$OK" == "imok" ]]; then + exit 0 + else + exit 1 + fi + + zookeeperReady.sh: | + #!/usr/bin/env bash + # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # https://www.apache.org/licenses/LICENSE-2.0 + set -ex + source /conf/env.sh + source /conf/zookeeperFunctions.sh + + HOST=`hostname -s` + VOLUME_DIR=/var/lib/zookeeper + MYID_FILE=$VOLUME_DIR/data/myid + LOG4J_CONF=/conf/log4j-quiet.properties + STATIC_CONFIG=$VOLUME_DIR/conf/zoo.cfg + + OK=$(exec 3<>/dev/tcp/127.0.0.1/${CLIENT_PORT} ; printf "ruok" >&3 ; IFS=; tee <&3; exec 3<&- ;) + + # Check to see if zookeeper service answers + if [[ "$OK" == "imok" ]]; then + set +e + getent hosts $DOMAIN + if [[ $? -ne 0 ]]; then + set -e + echo "There is no active ensemble, skipping readiness probe..." + exit 0 + else + set -e + # An ensemble exists, check to see if this node is already a member. + # Check to see if zookeeper service for this node is a participant + set +e + # Extract resource name and this members' ordinal value from pod hostname + HOST=`hostname -s` + if [[ $HOST =~ (.*)-([0-9]+)$ ]]; then + NAME=${BASH_REMATCH[1]} + ORD=${BASH_REMATCH[2]} + else + echo Failed to parse name and ordinal of Pod + exit 1 + fi + MYID=$((ORD+1)) + ONDISK_CONFIG=false + if [ -f $MYID_FILE ]; then + EXISTING_ID="`cat $MYID_FILE`" + if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then + #If Id is correct and configuration is present under `/var/lib/zookeeper/conf` + ONDISK_CONFIG=true + DYN_CFG_FILE_LINE=`cat $STATIC_CONFIG|grep "dynamicConfigFile\="` + DYN_CFG_FILE=${DYN_CFG_FILE_LINE##dynamicConfigFile=} + SERVER_FOUND=`cat $DYN_CFG_FILE | grep -c "server.${MYID}="` + if [[ "$SERVER_FOUND" == "0" ]]; then + echo "Server not found in ensemble. Exiting ..." + exit 1 + fi + SERVER=`cat $DYN_CFG_FILE | grep "server.${MYID}="` + if [[ "$SERVER" == *"participant"* ]]; then + ROLE=participant + elif [[ "$SERVER" == *"observer"* ]]; then + ROLE=observer + fi + fi + fi + + if [[ "$ROLE" == "participant" ]]; then + echo "Zookeeper service is available and an active participant" + exit 0 + elif [[ "$ROLE" == "observer" ]]; then + echo "Zookeeper service is ready to be upgraded from observer to participant." + ROLE=participant + ZKURL=$(zkConnectionString) + ZKCONFIG=$(zkConfig) + + # Note: according to https://zookeeper.apache.org/doc/r3.9.3/zookeeperReconfig.html + # > One can modify any of the ports of a server, or its role (participant/observer) by adding it to the ensemble with different parameters. + # > This works in both the incremental and the bulk reconfiguration modes. + # > It is not necessary to remove the server and then add it back; + # > just specify the new parameters as if the server is not yet in the system. + # > The server will detect the configuration change and perform the necessary adjustments. + + zkCli.sh -server ${ZKURL} reconfig -add "\nserver.$MYID=$ZKCONFIG" + exit 0 + else + echo "Something has gone wrong. Unable to determine zookeeper role." + exit 1 + fi + fi + + else + echo "Zookeeper service is not available for requests" + exit 1 + fi + + ensemble.env: | + # DESIRED_ENSEMBLE_SIZE specifies desired number of nodes in the ensemble. + # Set to an integer >= 1. If unset, scripts default to 3. + # Example: + DESIRED_ENSEMBLE_SIZE=3 + + # Optional: NEED_SCALE_DOWN override. Allow to force scale-down logic during shutdown + # even if the cluster has already the correct size + NEED_SCALE_DOWN=1 + +--- +# Main StatefulSet +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app: zookeeper + name: zookeeper + +spec: +# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention +# requires alpha `StatefulSetAutoDeletePVC=true` feature gate +# persistentVolumeClaimRetentionPolicy: +# whenDeleted: Delete +# whenScaled: Delete + podManagementPolicy: OrderedReady + replicas: 3 + selector: + matchLabels: + app: zookeeper + serviceName: zookeeper-headless + template: + metadata: + generateName: zookeeper + labels: + app: zookeeper + what: node + annotations: + prometheus.io/port: '7000' + prometheus.io/scrape: 'true' + spec: +# affinity: +# podAntiAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# - labelSelector: +# matchExpressions: +# - key: "app" +# operator: In +# values: +# - zookeeper +# # TODO think about multi-AZ EKS +# # topologyKey: topology.kubernetes.io/zone +# topologyKey: "kubernetes.io/hostname" +# nodeAffinity: +# preferredDuringSchedulingIgnoredDuringExecution: +# - preference: +# matchExpressions: +# - key: topology.kubernetes.io/zone +# operator: In +# values: [] # to be added programatically +# weight: 100 + containers: + - name: zookeeper + command: + - /conf/zookeeperStart.sh + image: docker.io/zookeeper:3.8.4 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /conf/zookeeperTeardown.sh + livenessProbe: + exec: + command: + - /conf/zookeeperLive.sh + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 10 + readinessProbe: + exec: + command: + - /conf/zookeeperReady.sh + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 10 + ports: + - containerPort: 2181 + name: client + protocol: TCP + - containerPort: 2888 + name: quorum + protocol: TCP + - containerPort: 3888 + name: leader-election + protocol: TCP + - containerPort: 7000 + name: metrics + protocol: TCP + - containerPort: 8080 + name: admin-server + protocol: TCP + volumeMounts: + - mountPath: /var/lib/zookeeper + name: datadir-volume + - mountPath: /conf + name: conf + restartPolicy: Always + schedulerName: default-scheduler + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + name: zookeeper-scripts + defaultMode: 0755 + name: conf + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - metadata: + labels: + app: zookeeper + name: datadir-volume + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + volumeMode: Filesystem diff --git a/tests/e2e/test_keeper.py b/tests/e2e/test_keeper.py index ac31ea246..71d3251a2 100644 --- a/tests/e2e/test_keeper.py +++ b/tests/e2e/test_keeper.py @@ -1,4 +1,5 @@ -import time +import os +os.environ["TEST_NAMESPACE"]="test-keeper" import e2e.clickhouse as clickhouse import e2e.kubectl as kubectl @@ -59,7 +60,7 @@ def insert_replicated_data(chi, pod_for_insert_data, create_tables, insert_table ) -def check_zk_root_znode(chi, keeper_type, pod_count, retry_count=15): +def check_zk_root_znode(chi, keeper_type, pod_count, ns, retry_count=15): for pod_num in range(pod_count): found = False for i in range(retry_count): @@ -82,7 +83,7 @@ def check_zk_root_znode(chi, keeper_type, pod_count, retry_count=15): out = kubectl.launch( f"exec {pod_prefix}-{pod_num} -- bash -ce '{keeper_cmd}'", - ns=settings.test_namespace, + ns=ns, ok_to_fail=True, ) found = False @@ -144,17 +145,17 @@ def rescale_zk_and_clickhouse( return chi -def delete_keeper_pvc(keeper_type): +def delete_keeper_pvc(keeper_type, ns): pvc_list = kubectl.get( kind="pvc", name="", label=f"-l app={keeper_type}", - ns=settings.test_namespace, + ns=ns, ok_to_fail=False, ) for pvc in pvc_list["items"]: if pvc["metadata"]["name"][-2:] != "-0": - kubectl.launch(f"delete pvc {pvc['metadata']['name']}", ns=settings.test_namespace) + kubectl.launch(f"delete pvc {pvc['metadata']['name']}", ns=ns) def start_stop_zk_and_clickhouse(chi_name, ch_stop, keeper_replica_count, keeper_type, keeper_manifest_1_node, @@ -167,6 +168,8 @@ def start_stop_zk_and_clickhouse(chi_name, ch_stop, keeper_replica_count, keeper keeper_manifest = f"../../deploy/zookeeper/zookeeper-manually/quick-start-persistent-volume/{keeper_manifest}" if keeper_type == "clickhouse-keeper": keeper_manifest = f"../../deploy/clickhouse-keeper/clickhouse-keeper-manually/{keeper_manifest}" + if keeper_type == "clickhouse-keeper_with_chk": + keeper_manifest = f"../../deploy/clickhouse-keeper/clickhouse-keeper-with-CHK-resource/{keeper_manifest}" if keeper_type == "zookeeper-operator": keeper_manifest = f"../../deploy/zookeeper/zookeeper-with-zookeeper-operator/{keeper_manifest}" @@ -189,14 +192,16 @@ def start_stop_zk_and_clickhouse(chi_name, ch_stop, keeper_replica_count, keeper f"Pods expected={keeper_replica_count} actual={pod_counts}, wait {3*(i+1)} seconds"): time.sleep(3*(i+1)) +def is_need_delete_keeper_pvc(keeper_type, keeper_manifest_1_node): + return keeper_type == "zookeeper" and ("scaleout-pvc" in keeper_manifest_1_node or "manual-teardown" in keeper_manifest_1_node) @TestOutline def test_keeper_rescale_outline( self, - keeper_type="zookeeper", - pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0", - keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only.yaml", - keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only.yaml", + keeper_type, + pod_for_insert_data, + keeper_manifest_1_node, + keeper_manifest_3_node, ): """ test scenario for Zoo/Clickhouse Keeper @@ -209,8 +214,8 @@ def test_keeper_rescale_outline( """ with When("Clean exists ClickHouse Keeper and ZooKeeper"): - kubectl.delete_all_keeper(settings.test_namespace) - kubectl.delete_all_chi(settings.test_namespace) + kubectl.delete_all_keeper(self.context.test_namespace) + kubectl.delete_all_chi(self.context.test_namespace) with When("Install CH 1 node ZK 1 node"): chi = rescale_zk_and_clickhouse( @@ -223,7 +228,7 @@ def test_keeper_rescale_outline( ) util.wait_clickhouse_cluster_ready(chi) wait_keeper_ready(keeper_type=keeper_type, pod_count=1) - check_zk_root_znode(chi, keeper_type, pod_count=1) + check_zk_root_znode(chi, keeper_type, pod_count=1, ns=self.context.test_namespace) util.wait_clickhouse_no_readonly_replicas(chi) insert_replicated_data( chi, @@ -244,7 +249,7 @@ def test_keeper_rescale_outline( keeper_manifest_3_node=keeper_manifest_3_node, ) wait_keeper_ready(keeper_type=keeper_type, pod_count=3) - check_zk_root_znode(chi, keeper_type, pod_count=3) + check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace) util.wait_clickhouse_cluster_ready(chi) util.wait_clickhouse_no_readonly_replicas(chi) @@ -264,9 +269,9 @@ def test_keeper_rescale_outline( keeper_manifest_3_node=keeper_manifest_3_node, ) wait_keeper_ready(keeper_type=keeper_type, pod_count=1) - check_zk_root_znode(chi, keeper_type, pod_count=1) + check_zk_root_znode(chi, keeper_type, pod_count=1, ns=self.context.test_namespace) if keeper_type == "zookeeper" and "scaleout-pvc" in keeper_manifest_1_node: - delete_keeper_pvc(keeper_type=keeper_type) + delete_keeper_pvc(keeper_type=keeper_type, ns=self.context.test_namespace) util.wait_clickhouse_cluster_ready(chi) util.wait_clickhouse_no_readonly_replicas(chi) @@ -285,7 +290,7 @@ def test_keeper_rescale_outline( keeper_manifest_1_node=keeper_manifest_1_node, keeper_manifest_3_node=keeper_manifest_3_node, ) - check_zk_root_znode(chi, keeper_type, pod_count=3) + check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace) for keeper_replica_count in [1, 3]: with When("Stop CH + ZK"): @@ -308,7 +313,7 @@ def test_keeper_rescale_outline( ) with Then("check data in tables"): - check_zk_root_znode(chi, keeper_type, pod_count=3) + check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace) util.wait_clickhouse_cluster_ready(chi) util.wait_clickhouse_no_readonly_replicas(chi) for table_name, exptected_rows in { @@ -364,38 +369,48 @@ def test_clickhouse_keeper_rescale_chk(self): ) -@TestScenario -@Name("test_zookeeper_operator_rescale. Check Zookeeper OPERATOR scale-up / scale-down cases") -def test_zookeeper_operator_rescale(self): - test_keeper_rescale_outline( - keeper_type="zookeeper-operator", - pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0", - keeper_manifest_1_node="zookeeper-operator-1-node.yaml", - keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml", - ) +# @TestScenario +# @Name("test_zookeeper_operator_rescale. Check Zookeeper OPERATOR scale-up / scale-down cases") +# def test_zookeeper_operator_rescale(self): +# test_keeper_rescale_outline( +# keeper_type="zookeeper-operator", +# pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0", +# keeper_manifest_1_node="zookeeper-operator-1-node.yaml", +# keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml", +# ) + + +# @TestScenario +# @Name("test_zookeeper_pvc_scaleout_rescale. Check ZK+PVC scale-up / scale-down cases") +# def test_zookeeper_pvc_scaleout_rescale(self): +# test_keeper_rescale_outline( +# keeper_type="zookeeper", +# pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0", +# keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml", +# keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml", +# ) @TestScenario -@Name("test_zookeeper_pvc_scaleout_rescale. Check ZK+PVC scale-up / scale-down cases") -def test_zookeeper_pvc_scaleout_rescale(self): +@Name("test_zookeeper_manual_teardown_rescale. Check ZK+Manual TEARDOWN scale-up / scale-down cases") +def test_zookeeper_manual_teardown_rescale(self): test_keeper_rescale_outline( keeper_type="zookeeper", pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0", - keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml", - keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml", + keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-manual-teardown.yaml", + keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-manual-teardown.yaml", ) - @TestOutline def test_keeper_probes_outline( self, - keeper_type="zookeeper", - keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only.yaml", - keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only.yaml", + keeper_type, + keeper_manifest_1_node, + keeper_manifest_3_node, ): with When("Clean exists ClickHouse Keeper and ZooKeeper"): - kubectl.delete_all_chi(settings.test_namespace) - kubectl.delete_all_keeper(settings.test_namespace) + kubectl.delete_all_chi(self.context.test_namespace) + kubectl.delete_all_keeper(self.context.test_namespace) with Then("Install CH 2 node ZK 3 node"): chi = rescale_zk_and_clickhouse( @@ -409,7 +424,7 @@ def test_keeper_probes_outline( ) util.wait_clickhouse_cluster_ready(chi) wait_keeper_ready(keeper_type=keeper_type, pod_count=3) - check_zk_root_znode(chi, keeper_type, pod_count=3) + check_zk_root_znode(chi, keeper_type, pod_count=3, ns=self.context.test_namespace) util.wait_clickhouse_no_readonly_replicas(chi) with Then("Create keeper_bench table"): @@ -430,7 +445,7 @@ def test_keeper_probes_outline( max_parts_in_total=1000000; """, ) - with Then("Insert data to keeper_bench for make zookeeper workload"): + with Then("Insert data to keeper_bench for make keeper workload"): pod_prefix = "chi-test-cluster-for-zk-default" rows = 100000 for pod in ("0-0-0", "0-1-0"): @@ -476,34 +491,34 @@ def test_zookeeper_probes_workload(self): ) -@TestScenario -@Name( - "test_zookeeper_pvc_probes_workload. Liveness + Readiness probes shall works fine " - "under workload in multi-datacenter installation" -) -def test_zookeeper_pvc_probes_workload(self): - test_keeper_probes_outline( - keeper_type="zookeeper", - keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml", - keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml", - ) - - -@TestScenario -@Name( - "test_zookeeper_operator_probes_workload. Liveness + Readiness probes shall works fine " - "under workload in multi-datacenter installation" -) -def test_zookeeper_operator_probes_workload(self): - test_keeper_probes_outline( - keeper_type="zookeeper-operator", - keeper_manifest_1_node="zookeeper-operator-1-node.yaml", - keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml", - - # uncomment only if you know how to use it - # keeper_manifest_1_node='zookeeper-operator-1-node-with-custom-probes.yaml', - # keeper_manifest_3_node='zookeeper-operator-3-nodes-with-custom-probes.yaml', - ) +# @TestScenario +# @Name( +# "test_zookeeper_pvc_probes_workload. Liveness + Readiness probes shall works fine " +# "under workload in multi-datacenter installation" +# ) +# def test_zookeeper_pvc_probes_workload(self): +# test_keeper_probes_outline( +# keeper_type="zookeeper", +# keeper_manifest_1_node="zookeeper-1-node-1GB-for-tests-only-scaleout-pvc.yaml", +# keeper_manifest_3_node="zookeeper-3-nodes-1GB-for-tests-only-scaleout-pvc.yaml", +# ) + + +# @TestScenario +# @Name( +# "test_zookeeper_operator_probes_workload. Liveness + Readiness probes shall works fine " +# "under workload in multi-datacenter installation" +# ) +# def test_zookeeper_operator_probes_workload(self): +# test_keeper_probes_outline( +# keeper_type="zookeeper-operator", +# keeper_manifest_1_node="zookeeper-operator-1-node.yaml", +# keeper_manifest_3_node="zookeeper-operator-3-nodes.yaml", +# +# # uncomment only if you know how to use it +# # keeper_manifest_1_node='zookeeper-operator-1-node-with-custom-probes.yaml', +# # keeper_manifest_3_node='zookeeper-operator-3-nodes-with-custom-probes.yaml', +# ) @TestScenario @@ -521,7 +536,7 @@ def test_clickhouse_keeper_probes_workload(self): @TestScenario @Name( - "test_clickhouse_keeper_probes_workload_with_CHKI. Liveness + Readiness probes shall works fine " + "test_clickhouse_keeper_probes_workload_with_chk. Liveness + Readiness probes shall works fine " "under workload in multi-datacenter installation" ) @Requirements(RQ_SRS_026_ClickHouseOperator_CustomResource_Kind_ClickHouseKeeperInstallation("1.0")) @@ -538,27 +553,31 @@ def test_clickhouse_keeper_probes_workload_with_chk(self): def test(self): with Given("set settings"): set_settings() - self.context.test_namespace = "test" - self.context.operator_namespace = "test" + self.context.test_namespace = "test-keeper" + self.context.operator_namespace = "test-keeper" with Given("I create shell"): shell = get_shell() self.context.shell = shell + + util.clean_namespace(delete_chi=True, delete_keeper=True) + util.install_operator_if_not_exist() + all_tests = [ - test_zookeeper_operator_rescale, + # test_zookeeper_operator_rescale, + # test_zookeeper_pvc_scaleout_rescale, test_clickhouse_keeper_rescale, test_clickhouse_keeper_rescale_chk, - test_zookeeper_pvc_scaleout_rescale, + test_zookeeper_manual_teardown_rescale, test_zookeeper_rescale, + # test_zookeeper_pvc_probes_workload, + # test_zookeeper_operator_probes_workload, test_zookeeper_probes_workload, - test_zookeeper_pvc_probes_workload, - test_zookeeper_operator_probes_workload, test_clickhouse_keeper_probes_workload, test_clickhouse_keeper_probes_workload_with_chk, ] - - util.clean_namespace(delete_chi=True, delete_keeper=True) - util.install_operator_if_not_exist() for t in all_tests: Scenario(test=t)() + + util.clean_namespace(delete_chi=True, delete_keeper=True) diff --git a/tests/e2e/util.py b/tests/e2e/util.py index 97aa98691..e99f05750 100644 --- a/tests/e2e/util.py +++ b/tests/e2e/util.py @@ -100,7 +100,7 @@ def require_keeper(keeper_manifest="", keeper_type=settings.keeper_type, force_i if doc["kind"] in ("StatefulSet", "ZookeeperCluster"): keeper_nodes = doc["spec"]["replicas"] expected_docs = { - "zookeeper": 5 if "scaleout-pvc" in keeper_manifest else 4, + "zookeeper": 5 if "scaleout-pvc" in keeper_manifest else 6 if "manual-teardown" in keeper_manifest else 4, "clickhouse-keeper": 7, "chk": 2, "zookeeper-operator": 3 if "probes" in keeper_manifest else 1,