Skip to content

Commit 13fd8ad

Browse files
dsmileyCopilotepugh
authored
Test rolling upgrade of Solr using Docker and BATS (#3706)
Demonstrates moving between two versions of Solr. Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: dsmiley <377295+dsmiley@users.noreply.github.com> Co-authored-by: Eric Pugh <epugh@opensourceconnections.com>
1 parent 949b28e commit 13fd8ad

File tree

5 files changed

+221
-48
lines changed

5 files changed

+221
-48
lines changed

solr/packaging/build.gradle

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,11 +327,14 @@ class BatsTask extends Exec {
327327
protected void exec() {
328328
executable "$project.ext.nodeProjectDir/node_modules/bats/bin/bats"
329329

330+
def batsArgs = []
331+
if (logger.isInfoEnabled()) {
332+
batsArgs << '--verbose-run'
333+
}
334+
batsArgs += ['-T', '--print-output-on-failure', '--report-formatter', 'junit', '--output', "$project.buildDir/test-output"]
330335
// Note: tests to run must be listed after all other arguments
331-
// Additional debugging output: -x, --verbose-run
332-
setArgs(['-T', '--print-output-on-failure', '--report-formatter', 'junit', '--output', "$project.buildDir/test-output"] +
333-
(testFiles.empty ? testDir : testFiles))
334-
336+
batsArgs += testFiles.empty ? [testDir] : testFiles
337+
setArgs(batsArgs)
335338
super.exec()
336339
}
337340
}

solr/packaging/test/bats_helper.bash

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -97,41 +97,21 @@ collection_exists() {
9797
return 1
9898
}
9999

100-
# Wait for a collection to be queryable
101-
wait_for_collection() {
102-
local collection="$1"
103-
local timeout=${2:-180}
104-
local start_ts
105-
start_ts=$(date +%s)
106-
while true; do
107-
if curl -s -S -f "http://localhost:${SOLR_PORT}/solr/${collection}/select?q=*:*" | grep -q '"responseHeader"'; then
100+
# Utility function to retry a command until it succeeds or times out
101+
wait_for() {
102+
local timeout="${1:-30}" # Default 30 seconds timeout
103+
local interval="${2:-1}" # Default 1 second between retries
104+
shift 2 # Remove timeout and interval from args
105+
local command=("$@") # Remaining args are the command to execute
106+
107+
local end_time=$(($(date +%s) + timeout))
108+
109+
while [ $(date +%s) -lt $end_time ]; do
110+
if "${command[@]}"; then
108111
return 0
109112
fi
110-
local now
111-
now=$(date +%s)
112-
if [ $(( now - start_ts )) -ge ${timeout} ]; then
113-
echo "Timed out waiting for collection '${collection}' to become queryable" >&2
114-
return 1
115-
fi
116-
sleep 3
113+
sleep "$interval"
117114
done
118-
}
119-
120-
# Apply the ExtractingRequestHandler via Config API and print error body on failure
121-
apply_extract_handler() {
122-
local collection="$1"
123-
local json="{\"add-requesthandler\":{\"name\":\"/update/extract\",\"class\":\"org.apache.solr.handler.extraction.ExtractingRequestHandler\",\"tikaserver.url\":\"http://localhost:${TIKA_PORT}\",\"defaults\":{\"lowernames\":\"true\",\"captureAttr\":\"true\"}}}"
124-
local url="http://localhost:${SOLR_PORT}/solr/${collection}/config"
125-
# Capture body and status code
126-
local resp code body
127-
sleep 5
128-
resp=$(curl -s -S -w "\n%{http_code}" -X POST -H 'Content-type:application/json' -d "$json" "$url")
129-
code="${resp##*$'\n'}"
130-
body="${resp%$'\n'*}"
131-
if [ "$code" = "200" ]; then
132-
return 0
133-
else
134-
echo "Config API error applying ExtractingRequestHandler to ${collection} (HTTP ${code}): ${body}" >&3
135-
return 1
136-
fi
115+
116+
return 1 # Timeout reached
137117
}

solr/packaging/test/test_extraction.bats

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,33 @@
1818

1919
load bats_helper
2020

21+
# Apply the ExtractingRequestHandler via Config API and print error body on failure
22+
apply_extract_handler() {
23+
local collection="$1"
24+
local json="{\"add-requesthandler\":{\"name\":\"/update/extract\",\"class\":\"org.apache.solr.handler.extraction.ExtractingRequestHandler\",\"tikaserver.url\":\"http://localhost:${TIKA_PORT}\",\"defaults\":{\"lowernames\":\"true\",\"captureAttr\":\"true\"}}}"
25+
local url="http://localhost:${SOLR_PORT}/solr/${collection}/config"
26+
# Capture body and status code
27+
local resp code body
28+
sleep 5
29+
resp=$(curl -s -S -w "\n%{http_code}" -X POST -H 'Content-type:application/json' -d "$json" "$url")
30+
code="${resp##*$'\n'}"
31+
body="${resp%$'\n'*}"
32+
if [ "$code" = "200" ]; then
33+
return 0
34+
else
35+
echo "Config API error applying ExtractingRequestHandler to ${collection} (HTTP ${code}): ${body}" >&3
36+
return 1
37+
fi
38+
}
39+
2140
setup_file() {
2241
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
2342
export TIKA_PORT=$((SOLR_PORT+5))
2443
docker run --rm -p ${TIKA_PORT}:9998 --name bats_tika -d apache/tika:3.2.3.0-full >/dev/null 2>&1 || true
2544
echo "Tika Server started on port ${TIKA_PORT}" >&3
2645
else
2746
export DOCKER_UNAVAILABLE=1
28-
echo "WARNING: Docker not available (CLI missing or daemon not running); Tika-dependent tests will be bypassed and marked as passed." >&3
47+
echo "WARNING: Docker not available (CLI missing or daemon not running); Tika-dependent tests will be bypassed." >&3
2948
fi
3049
}
3150

@@ -51,8 +70,7 @@ teardown() {
5170
@test "using curl to extract a single pdf file" {
5271

5372
if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
54-
echo "WARNING: Docker not available; bypassing test." >&3
55-
return 0
73+
skip "Docker is not available"
5674
fi
5775

5876
# Disable security manager to allow extraction
@@ -61,7 +79,7 @@ teardown() {
6179
solr start -Dsolr.modules=extraction
6280

6381
solr create -c gettingstarted -d _default
64-
wait_for_collection gettingstarted 30
82+
wait_for 30 3 curl -s -S -f "http://localhost:${SOLR_PORT}/solr/gettingstarted/select?q=*:*" -o /dev/null
6583
apply_extract_handler gettingstarted
6684

6785
curl "http://localhost:${SOLR_PORT}/solr/gettingstarted/update/extract?literal.id=doc1&commit=true" -F "myfile=@${SOLR_TIP}/example/exampledocs/solr-word.pdf"
@@ -73,8 +91,7 @@ teardown() {
7391
@test "using the bin/solr post tool to extract content from pdf" {
7492

7593
if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
76-
echo "WARNING: Docker not available; bypassing test." >&3
77-
return 0
94+
skip "Docker is not available"
7895
fi
7996

8097
# Disable security manager to allow extraction
@@ -83,7 +100,7 @@ teardown() {
83100
solr start -Dsolr.modules=extraction
84101

85102
solr create -c content_extraction -d _default
86-
wait_for_collection content_extraction 30
103+
wait_for 30 3 curl -s -S -f "http://localhost:${SOLR_PORT}/solr/content_extraction/select?q=*:*" -o /dev/null
87104
apply_extract_handler content_extraction
88105

89106
# We filter to pdf to invoke the Extract handler.
@@ -99,8 +116,7 @@ teardown() {
99116
@test "using the bin/solr post tool to crawl web site" {
100117

101118
if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
102-
echo "WARNING: Docker not available; bypassing test." >&3
103-
return 0
119+
skip "Docker is not available"
104120
fi
105121

106122
# Disable security manager to allow extraction
@@ -109,7 +125,7 @@ teardown() {
109125
solr start -Dsolr.modules=extraction
110126

111127
solr create -c website_extraction -d _default
112-
wait_for_collection website_extraction 30
128+
wait_for 30 3 curl -s -S -f "http://localhost:${SOLR_PORT}/solr/website_extraction/select?q=*:*" -o /dev/null
113129
apply_extract_handler website_extraction
114130

115131
# Change to --recursive 1 to crawl multiple pages, but may be too slow.
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env bats
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
load bats_helper
19+
20+
# You can test alternative images via
21+
# export SOLR_BEGIN_IMAGE="apache/solr-nightly:9.9.0-slim" and then running
22+
# ./gradlw iTest --tests test_docker_solrcloud.bats
23+
SOLR_BEGIN_IMAGE="${SOLR_BEGIN_IMAGE:-apache/solr-nightly:9.10.0-SNAPSHOT-slim}"
24+
SOLR_END_IMAGE="${SOLR_END_IMAGE:-apache/solr-nightly:10.0.0-SNAPSHOT-slim}"
25+
26+
setup() {
27+
common_clean_setup
28+
29+
# Pre-checks
30+
if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
31+
skip "Docker is not available"
32+
fi
33+
docker pull "$SOLR_BEGIN_IMAGE" || skip "Docker image $SOLR_BEGIN_IMAGE is not available"
34+
docker pull "$SOLR_END_IMAGE" || skip "Docker image $SOLR_END_IMAGE is not available"
35+
36+
# Record test start time for scoping logs on failure
37+
TEST_STARTED_AT_ISO=$(date -Iseconds)
38+
export TEST_STARTED_AT_ISO
39+
40+
# Persist artifacts under Gradle’s test-output
41+
ARTIFACT_DIR="${TEST_OUTPUT_DIR}/docker"
42+
mkdir -p "$ARTIFACT_DIR"
43+
export ARTIFACT_DIR
44+
}
45+
46+
teardown() {
47+
failed=$([[ -z "${BATS_TEST_COMPLETED:-}" ]] && [[ -z "${BATS_TEST_SKIPPED:-}" ]] && echo 1 || echo 0)
48+
if [[ "$failed" -eq 1 ]]; then
49+
echo "# Test failed - capturing Docker diagnostics" >&3
50+
echo "# === docker ps (summary) ===" >&3
51+
docker ps -a --format 'table {{.Names}}\t{{.Status}}\t{{.Image}}\t{{.Ports}}' >&3 2>&3 || true
52+
fi
53+
54+
for container in solr-node1 solr-node2 solr-node3; do
55+
if docker ps -a --format '{{.Names}}' | grep -q "^${container}$" 2>/dev/null; then
56+
if [[ "$failed" -eq 1 ]]; then
57+
echo "# === Docker logs for $container ===" >&3
58+
docker logs --timestamps --since "$TEST_STARTED_AT_ISO" "$container" >&3 2>&3 || echo "# Failed to get logs for $container" >&3
59+
echo "# === Docker inspect for $container ===" >&3
60+
docker inspect "$container" | jq '.[] | {Name: .Name, State: .State, Ports: .NetworkSettings.Ports}' >&3 2>&3 || true
61+
fi
62+
# Persist artifacts
63+
docker logs --timestamps "$container" >"$ARTIFACT_DIR/${container}.log" 2>&1 || true
64+
docker inspect "$container" >"$ARTIFACT_DIR/${container}.inspect.json" 2>&1 || true
65+
docker exec "$container" ps aux >"$ARTIFACT_DIR/${container}.ps.txt" 2>&1 || true
66+
fi
67+
done
68+
69+
echo "# Docker artifacts saved to: $ARTIFACT_DIR" >&3
70+
71+
docker stop solr-node1 solr-node2 solr-node3 2>/dev/null || true
72+
docker rm solr-node1 solr-node2 solr-node3 2>/dev/null || true
73+
docker volume rm solr-data1 solr-data2 solr-data3 2>/dev/null || true
74+
docker network rm solrcloud-test 2>/dev/null || true
75+
}
76+
77+
@test "Docker SolrCloud rolling upgrade" {
78+
# Networking & volumes
79+
docker network create solrcloud-test
80+
docker volume create solr-data1
81+
docker volume create solr-data2
82+
docker volume create solr-data3
83+
84+
echo "Starting solr-node1 with embedded ZooKeeper"
85+
docker run --name solr-node1 -d \
86+
--network solrcloud-test \
87+
--memory=400m \
88+
--platform linux/amd64 \
89+
-v solr-data1:/var/solr \
90+
"$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node1 -p 8983
91+
docker exec solr-node1 solr assert --started http://solr-node1:8983 --timeout 10000
92+
93+
# start next 2 in parallel
94+
95+
echo "Starting solr-node2 connected to first node's ZooKeeper"
96+
docker run --name solr-node2 -d \
97+
--network solrcloud-test \
98+
--memory=400m \
99+
--platform linux/amd64 \
100+
-v solr-data2:/var/solr \
101+
"$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node2 -p 8984 -z solr-node1:9983
102+
103+
echo "Started solr-node3 connected to first node's ZooKeeper"
104+
docker run --name solr-node3 -d \
105+
--network solrcloud-test \
106+
--memory=400m \
107+
--platform linux/amd64 \
108+
-v solr-data3:/var/solr \
109+
"$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node3 -p 8985 -z solr-node1:9983
110+
111+
docker exec solr-node2 solr assert --started http://solr-node2:8984 --timeout 30000
112+
docker exec solr-node3 solr assert --started http://solr-node3:8985 --timeout 30000
113+
114+
echo "Creating a Collection"
115+
docker exec --user=solr solr-node1 solr create -c test-collection -n techproducts --shards 3
116+
117+
echo "Checking collection health"
118+
wait_for 30 1 docker exec solr-node1 solr healthcheck -c test-collection
119+
120+
echo "Add some sample data"
121+
docker exec --user=solr solr-node1 solr post -c test-collection example/exampledocs/mem.xml
122+
assert_success
123+
124+
# Begin rolling upgrade - upgrade node 3 first (reverse order: 3, 2, 1)
125+
echo "Starting rolling upgrade - upgrading node 3"
126+
docker stop solr-node3
127+
docker rm solr-node3
128+
docker run --name solr-node3 -d \
129+
--network solrcloud-test \
130+
--memory=400m \
131+
--platform linux/amd64 \
132+
-v solr-data3:/var/solr \
133+
"$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node3 -p 8985 -z solr-node1:9983
134+
docker exec solr-node3 solr assert --started http://solr-node3:8985 --timeout 30000
135+
assert_success
136+
137+
# Upgrade node 2 second
138+
echo "Upgrading node 2"
139+
docker stop solr-node2
140+
docker rm solr-node2
141+
docker run --name solr-node2 -d \
142+
--network solrcloud-test \
143+
--memory=400m \
144+
--platform linux/amd64 \
145+
-v solr-data2:/var/solr \
146+
"$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node2 -p 8984 -z solr-node1:9983
147+
docker exec solr-node2 solr assert --started http://solr-node2:8984 --timeout 30000
148+
assert_success
149+
150+
echo "Upgrading node 1 (ZK node)"
151+
docker stop solr-node1
152+
docker rm solr-node1
153+
docker run --name solr-node1 -d \
154+
--network solrcloud-test \
155+
--memory=400m \
156+
--platform linux/amd64 \
157+
-v solr-data1:/var/solr \
158+
"$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node1 -p 8983
159+
docker exec solr-node1 solr assert --started http://solr-node1:8983 --timeout 30000
160+
assert_success
161+
162+
# Final collection health check
163+
wait_for 30 1 docker exec solr-node1 solr healthcheck -c test-collection
164+
165+
echo "checking cluster has exactly 3 live nodes"
166+
run docker exec solr-node1 curl -s "http://solr-node1:8983/solr/admin/collections?action=CLUSTERSTATUS"
167+
assert_success
168+
169+
local live_nodes_count=$(echo "$output" | jq -r '.cluster.live_nodes | length')
170+
assert_equal "$live_nodes_count" "3"
171+
172+
}

solr/packaging/test/test_start_solr.bats

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ teardown() {
6060
# for start/stop/restart we parse the args separate from picking the command
6161
# which means you don't get an error message for passing a start arg, like --jvm-opts to a stop commmand.
6262

63+
# Pre-check
64+
timeout || skip "timeout utility is not available"
6365
# Set a timeout duration (in seconds)
6466
TIMEOUT_DURATION=2
6567

0 commit comments

Comments
 (0)