Skip to content

Commit 92913f3

Browse files
authored
[feat](test) Add clone instance test for recycler (#58773)
1 parent 6c3c551 commit 92913f3

File tree

2 files changed

+173
-0
lines changed

2 files changed

+173
-0
lines changed

regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th
6868
"test_recycler_with_txn_label," +
6969
"test_recycler," +
7070
"test_recycler_cleanup_snapshot," +
71+
"test_recycler_clone_instance," +
7172
"zzz_the_end_sentinel_do_not_touch" // keep this line as the last line
7273

7374
excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("test_recycler_clone_instance") {
19+
def enableMultiVersionStatus = context.config.enableMultiVersionStatus
20+
def enableClusterSnapshot = context.config.enableClusterSnapshot
21+
def metaServiceHttpAddress = context.config.metaServiceHttpAddress
22+
def dbName = "regression_test_cloud_p0_recycler"
23+
24+
if (!enableClusterSnapshot || !enableMultiVersionStatus) {
25+
logger.info("enableClusterSnapshot or enableMultiVersionStatus is not true, skip clone instance")
26+
return
27+
}
28+
29+
def get_instance_id = { host ->
30+
def url = "http://${host}/MetaService/http/get_instance?token=${token}&instance_id=${instance_id}"
31+
def (code, out, err) = curl('GET', url, null, 180)
32+
assert code == 0 : "Failed to get multi version status: ${out} ${err}"
33+
def json = parseJson(out)
34+
assert json["code"] == "OK" : "Get instance failed: ${out} ${err}"
35+
if (json["result"].containsKey("instance_id")) {
36+
return json["result"]["instance_id"]
37+
}
38+
return null
39+
}
40+
41+
sql "CREATE DATABASE IF NOT EXISTS ${dbName}"
42+
43+
def instanceId = get_instance_id(metaServiceHttpAddress)
44+
def username = "root"
45+
46+
logger.info("Querying cluster snapshots")
47+
def snapshotResult = sql """ SELECT * FROM information_schema.cluster_snapshots """
48+
logger.info("Snapshot result: ${snapshotResult}")
49+
50+
assert snapshotResult != null && !snapshotResult.isEmpty(), "No snapshots found, cannot run this test"
51+
52+
def latestSnapshot = snapshotResult[-1]
53+
def snapshotId = latestSnapshot[0]
54+
def snapshotLabel = latestSnapshot[9]
55+
logger.info("Latest snapshot_id: ${snapshotId}, snapshot_label: ${snapshotLabel}")
56+
57+
logger.info("Getting FE information")
58+
def feResult = sql """ SHOW FRONTENDS """
59+
logger.info("FE result: ${feResult}")
60+
61+
def masterFe = null
62+
for (def fe : feResult) {
63+
if (fe[8] == "true") {
64+
masterFe = fe
65+
break
66+
}
67+
}
68+
69+
assert masterFe != null, "No master FE found"
70+
71+
def masterFeHost = masterFe[1]
72+
logger.info("Master FE host: ${masterFeHost}")
73+
74+
def dorisHome = System.getenv("DORIS_HOME")
75+
logger.info("DORIS_HOME: ${dorisHome}")
76+
77+
assert dorisHome != null, "DORIS_HOME is not set"
78+
79+
def executeCommand = { String cmd, Boolean mustSuc ->
80+
try {
81+
logger.info("Execute: ${cmd}")
82+
def proc = new ProcessBuilder("/bin/bash", "-c", cmd).redirectErrorStream(true).start()
83+
int exitcode = proc.waitFor()
84+
def output = proc.text
85+
if (exitcode != 0) {
86+
logger.info("Exit code: ${exitcode}, output: ${output}")
87+
if (mustSuc == true) {
88+
throw new Exception("Command failed with exit code ${exitcode}: ${output}")
89+
}
90+
}
91+
return output
92+
} catch (IOException e) {
93+
logger.error("Execute timeout: ${e.message}")
94+
throw new Exception("Execute timeout: ${e.message}")
95+
}
96+
}
97+
98+
logger.info("Getting BE information and stopping all BEs and FEs")
99+
def beResult = sql """ SHOW BACKENDS """
100+
logger.info("BE result: ${beResult}")
101+
102+
def beHosts = []
103+
for (def be : beResult) {
104+
def beHost = be[1]
105+
beHosts.add(beHost)
106+
}
107+
108+
logger.info("BE hosts: ${beHosts}")
109+
110+
logger.info("Cleaning storage directory on all BEs")
111+
def storagePath = "${dorisHome}/be/storage/*"
112+
deleteRemotePathOnAllBE(username, storagePath)
113+
logger.info("Cleaned storage directory on all BEs")
114+
115+
logger.info("Stopping master FE on ${masterFeHost} and cleaning doris-meta directory")
116+
def stopFeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"${dorisHome}/fe/bin/stop_fe.sh\""
117+
executeCommand(stopFeCmd, true)
118+
logger.info("Master FE stopped successfully")
119+
120+
def cleanMetaCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"rm -rf ${dorisHome}/fe/doris-meta/*\""
121+
executeCommand(cleanMetaCmd, true)
122+
logger.info("Cleaned doris-meta directory")
123+
124+
for (def beHost : beHosts) {
125+
logger.info("Stopping BE on ${beHost}")
126+
def stopBeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${beHost} \"${dorisHome}/be/bin/stop_be.sh\""
127+
executeCommand(stopBeCmd, true)
128+
logger.info("BE on ${beHost} stopped successfully")
129+
}
130+
131+
logger.info("Creating snapshot_info.json")
132+
133+
def newInstanceId = instanceId + "_cloned"
134+
def instanceName = "cloned_instance"
135+
136+
def snapshotInfoJson = """{"from_instance_id":"${instanceId}","from_snapshot_id":"${snapshotId}","instance_id":"${newInstanceId}","name":"${instanceName}","is_successor":true}"""
137+
138+
logger.info("Snapshot info JSON content: ${snapshotInfoJson}")
139+
140+
def jsonBase64 = snapshotInfoJson.bytes.encodeBase64().toString()
141+
142+
def createJsonCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"echo '${jsonBase64}' | base64 -d > ${dorisHome}/fe/snapshot_info.json\""
143+
executeCommand(createJsonCmd, true)
144+
logger.info("Created snapshot_info.json successfully")
145+
146+
def verifyJsonCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"cat ${dorisHome}/fe/snapshot_info.json\""
147+
def jsonContent = executeCommand(verifyJsonCmd, true)
148+
logger.info("Verified snapshot_info.json content: ${jsonContent}")
149+
150+
logger.info("Starting FE with snapshot_info.json")
151+
def startFeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${masterFeHost} \"cd ${dorisHome}/fe/bin && ./start_fe.sh --daemon --cluster_snapshot snapshot_info.json\""
152+
executeCommand(startFeCmd, true)
153+
logger.info("Master FE started successfully with cluster snapshot")
154+
155+
logger.info("Waiting for FE to be ready...")
156+
Thread.sleep(60000)
157+
158+
context.reconnectFe()
159+
160+
logger.info("Starting all BEs")
161+
for (def beHost : beHosts) {
162+
logger.info("Starting BE on ${beHost}")
163+
def startBeCmd = "ssh -o StrictHostKeyChecking=no ${username}@${beHost} \"cd ${dorisHome}/be/bin && ./start_be.sh --daemon\""
164+
executeCommand(startBeCmd, true)
165+
logger.info("BE on ${beHost} started successfully")
166+
}
167+
168+
logger.info("Waiting for BEs to be ready...")
169+
Thread.sleep(60000)
170+
171+
logger.info("All steps completed successfully. FE and BEs have been restarted with cloned instance.")
172+
}

0 commit comments

Comments
 (0)