|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +suite("test_recycler_clone_instance") { |
| 19 | + def enableMultiVersionStatus = context.config.enableMultiVersionStatus |
| 20 | + def enableClusterSnapshot = context.config.enableClusterSnapshot |
| 21 | + def metaServiceHttpAddress = context.config.metaServiceHttpAddress |
| 22 | + def dbName = "regression_test_cloud_p0_recycler" |
| 23 | + |
| 24 | + if (!enableClusterSnapshot || !enableMultiVersionStatus) { |
| 25 | + logger.info("enableClusterSnapshot or enableMultiVersionStatus is not true, skip clone instance") |
| 26 | + return |
| 27 | + } |
| 28 | + |
| 29 | + def get_instance_id = { host -> |
| 30 | + def url = "http://${host}/MetaService/http/get_instance?token=${token}&instance_id=${instance_id}" |
| 31 | + def (code, out, err) = curl('GET', url, null, 180) |
| 32 | + assert code == 0 : "Failed to get multi version status: ${out} ${err}" |
| 33 | + def json = parseJson(out) |
| 34 | + assert json["code"] == "OK" : "Get instance failed: ${out} ${err}" |
| 35 | + if (json["result"].containsKey("instance_id")) { |
| 36 | + return json["result"]["instance_id"] |
| 37 | + } |
| 38 | + return null |
| 39 | + } |
| 40 | + |
| 41 | + sql "CREATE DATABASE IF NOT EXISTS ${dbName}" |
| 42 | + |
| 43 | + def instanceId = get_instance_id(metaServiceHttpAddress) |
| 44 | + def username = "root" |
| 45 | + |
| 46 | + logger.info("Querying cluster snapshots") |
| 47 | + def snapshotResult = sql """ SELECT * FROM information_schema.cluster_snapshots """ |
| 48 | + logger.info("Snapshot result: ${snapshotResult}") |
| 49 | + |
| 50 | + assert snapshotResult != null && !snapshotResult.isEmpty(), "No snapshots found, cannot run this test" |
| 51 | + |
| 52 | + def latestSnapshot = snapshotResult[-1] |
| 53 | + def snapshotId = latestSnapshot[0] |
| 54 | + def snapshotLabel = latestSnapshot[9] |
| 55 | + logger.info("Latest snapshot_id: ${snapshotId}, snapshot_label: ${snapshotLabel}") |
| 56 | + |
| 57 | + logger.info("Getting FE information") |
| 58 | + def feResult = sql """ SHOW FRONTENDS """ |
| 59 | + logger.info("FE result: ${feResult}") |
| 60 | + |
| 61 | + def masterFe = null |
| 62 | + for (def fe : feResult) { |
| 63 | + if (fe[8] == "true") { |
| 64 | + masterFe = fe |
| 65 | + break |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + assert masterFe != null, "No master FE found" |
| 70 | + |
| 71 | + def masterFeHost = masterFe[1] |
| 72 | + logger.info("Master FE host: ${masterFeHost}") |
| 73 | + |
| 74 | + def dorisHome = System.getenv("DORIS_HOME") |
| 75 | + logger.info("DORIS_HOME: ${dorisHome}") |
| 76 | + |
| 77 | + assert dorisHome != null, "DORIS_HOME is not set" |
| 78 | + |
| 79 | + def executeCommand = { String cmd, Boolean mustSuc -> |
| 80 | + try { |
| 81 | + logger.info("Execute: ${cmd}") |
| 82 | + def proc = new ProcessBuilder("/bin/bash", "-c", cmd).redirectErrorStream(true).start() |
| 83 | + int exitcode = proc.waitFor() |
| 84 | + def output = proc.text |
| 85 | + if (exitcode != 0) { |
| 86 | + logger.info("Exit code: ${exitcode}, output: ${output}") |
| 87 | + if (mustSuc == true) { |
| 88 | + throw new Exception("Command failed with exit code ${exitcode}: ${output}") |
| 89 | + } |
| 90 | + } |
| 91 | + return output |
| 92 | + } catch (IOException e) { |
| 93 | + logger.error("Execute timeout: ${e.message}") |
| 94 | + throw new Exception("Execute timeout: ${e.message}") |
| 95 | + } |
| 96 | + } |
| 97 | + |
| 98 | + logger.info("Getting BE information and stopping all BEs and FEs") |
| 99 | + def beResult = sql """ SHOW BACKENDS """ |
| 100 | + logger.info("BE result: ${beResult}") |
| 101 | + |
| 102 | + def beHosts = [] |
| 103 | + for (def be : beResult) { |
| 104 | + def beHost = be[1] |
| 105 | + beHosts.add(beHost) |
| 106 | + } |
| 107 | + |
| 108 | + logger.info("BE hosts: ${beHosts}") |
| 109 | + |
| 110 | + logger.info("Cleaning storage directory on all BEs") |
| 111 | + def storagePath = "${dorisHome}/be/storage/*" |
| 112 | + deleteRemotePathOnAllBE(username, storagePath) |
| 113 | + logger.info("Cleaned storage directory on all BEs") |
| 114 | + |
| 115 | + logger.info("Stopping master FE on ${masterFeHost} and cleaning doris-meta directory") |
| 116 | + def stopFeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"${dorisHome}/fe/bin/stop_fe.sh\"" |
| 117 | + executeCommand(stopFeCmd, true) |
| 118 | + logger.info("Master FE stopped successfully") |
| 119 | + |
| 120 | + def cleanMetaCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"rm -rf ${dorisHome}/fe/doris-meta/*\"" |
| 121 | + executeCommand(cleanMetaCmd, true) |
| 122 | + logger.info("Cleaned doris-meta directory") |
| 123 | + |
| 124 | + for (def beHost : beHosts) { |
| 125 | + logger.info("Stopping BE on ${beHost}") |
| 126 | + def stopBeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${beHost} \"${dorisHome}/be/bin/stop_be.sh\"" |
| 127 | + executeCommand(stopBeCmd, true) |
| 128 | + logger.info("BE on ${beHost} stopped successfully") |
| 129 | + } |
| 130 | + |
| 131 | + logger.info("Creating snapshot_info.json") |
| 132 | + |
| 133 | + def newInstanceId = instanceId + "_cloned" |
| 134 | + def instanceName = "cloned_instance" |
| 135 | + |
| 136 | + def snapshotInfoJson = """{"from_instance_id":"${instanceId}","from_snapshot_id":"${snapshotId}","instance_id":"${newInstanceId}","name":"${instanceName}","is_successor":true}""" |
| 137 | + |
| 138 | + logger.info("Snapshot info JSON content: ${snapshotInfoJson}") |
| 139 | + |
| 140 | + def jsonBase64 = snapshotInfoJson.bytes.encodeBase64().toString() |
| 141 | + |
| 142 | + def createJsonCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"echo '${jsonBase64}' | base64 -d > ${dorisHome}/fe/snapshot_info.json\"" |
| 143 | + executeCommand(createJsonCmd, true) |
| 144 | + logger.info("Created snapshot_info.json successfully") |
| 145 | + |
| 146 | + def verifyJsonCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"cat ${dorisHome}/fe/snapshot_info.json\"" |
| 147 | + def jsonContent = executeCommand(verifyJsonCmd, true) |
| 148 | + logger.info("Verified snapshot_info.json content: ${jsonContent}") |
| 149 | + |
| 150 | + logger.info("Starting FE with snapshot_info.json") |
| 151 | + def startFeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"cd ${dorisHome}/fe/bin && ./start_fe.sh --daemon --cluster_snapshot snapshot_info.json\"" |
| 152 | + executeCommand(startFeCmd, true) |
| 153 | + logger.info("Master FE started successfully with cluster snapshot") |
| 154 | + |
| 155 | + logger.info("Waiting for FE to be ready...") |
| 156 | + Thread.sleep(60000) |
| 157 | + |
| 158 | + context.reconnectFe() |
| 159 | + |
| 160 | + logger.info("Starting all BEs") |
| 161 | + for (def beHost : beHosts) { |
| 162 | + logger.info("Starting BE on ${beHost}") |
| 163 | + def startBeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${beHost} \"cd ${dorisHome}/be/bin && ./start_be.sh --daemon\"" |
| 164 | + executeCommand(startBeCmd, true) |
| 165 | + logger.info("BE on ${beHost} started successfully") |
| 166 | + } |
| 167 | + |
| 168 | + logger.info("Waiting for BEs to be ready...") |
| 169 | + Thread.sleep(60000) |
| 170 | + |
| 171 | + logger.info("All steps completed successfully. FE and BEs have been restarted with cloned instance.") |
| 172 | +} |
0 commit comments