Skip to content

Commit 0105721

Browse files
authored
Merge branch 'master' into CLOUD-3506
2 parents 2d5c98c + 5065b67 commit 0105721

File tree

11 files changed

+285
-25
lines changed

11 files changed

+285
-25
lines changed

.github/workflows/main.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# This workflow will build a Java project with Mav
2+
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
3+
4+
name: JBoss EAP OpenShift Modules - Bats
5+
on:
6+
push:
7+
branches: [ master ]
8+
pull_request:
9+
branches: [ master ]
10+
env:
11+
LANG: en_US.UTF-8
12+
13+
jobs:
14+
bats:
15+
name: Bats Shell Tests
16+
runs-on: ${{ matrix.os }}
17+
strategy:
18+
fail-fast: false
19+
matrix:
20+
os: [ubuntu-latest]
21+
steps:
22+
- uses: actions/checkout@v2
23+
- name: Install bats
24+
run: sudo apt-get install bats
25+
- name: Run Bats
26+
shell: bash
27+
run: |
28+
rc=0
29+
echo "TAP version 13"
30+
for testName in `find ./ -name *.bats`;
31+
do
32+
echo ${testName};
33+
bats --tap ${testName} || rc=$?
34+
done
35+
exit ${rc}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
mavenRepo="$1"
4+
if [ -f "$mavenRepo/patches.xml" ]; then
5+
echo "The maven repository has been patched, setting patches in galleon feature-pack."
6+
patches=`cat "$mavenRepo/patches.xml" | sed ':a;N;$!ba;s/\n//g'`
7+
sed -i "s|<!-- ##PATCHES## -->|$patches|" "${GALLEON_FP_PATH}/wildfly-user-feature-pack-build.xml"
8+
echo "wildfly-user-feature-pack-build.xml content:"
9+
cat "${GALLEON_FP_PATH}/wildfly-user-feature-pack-build.xml"
10+
fi

jboss/container/eap/galleon/module.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ envs:
88
value: "19.0.0.Final"
99
- name: GALLEON_DEFINITIONS
1010
value: /opt/jboss/container/eap/galleon/definitions
11+
- name: GALLEON_MAVEN_REPO_HOOK_SCRIPT
12+
value: /opt/jboss/container/eap/galleon/patching.sh
1113
- name: GALLEON_DEFAULT_SERVER
1214
value: /opt/jboss/container/eap/galleon/definitions/slim-default-server
1315
- name: GALLEON_DEFAULT_FAT_SERVER

os-eap-migration/added/launch/openshift-migrate-common.sh

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ function runMigration() {
2020

2121
echo "Running $JBOSS_IMAGE_NAME image, version $JBOSS_IMAGE_VERSION"
2222

23-
local txOptions="-Dcom.arjuna.ats.arjuna.common.RecoveryEnvironmentBean.recoveryBackoffPeriod=1 -Dcom.arjuna.ats.arjuna.common.RecoveryEnvironmentBean.periodicRecoveryPeriod=1 -Dcom.arjuna.ats.jta.common.JTAEnvironmentBean.orphanSafetyInterval=1"
23+
local txOptions="-Djboss.node.name=${NODE_NAME} -Dcom.arjuna.ats.arjuna.common.RecoveryEnvironmentBean.recoveryBackoffPeriod=1 -Dcom.arjuna.ats.arjuna.common.RecoveryEnvironmentBean.periodicRecoveryPeriod=1 -Dcom.arjuna.ats.jta.common.JTAEnvironmentBean.orphanSafetyInterval=1"
2424
local terminatingFile="${JBOSS_HOME}/terminatingMigration"
2525

2626
(runMigrationServer "$instanceDir" "${txOptions}") &
@@ -54,8 +54,11 @@ function runMigration() {
5454

5555
if [ "${recoveryPort}" != "undefined" ] ; then
5656
local recoveryClass="com.arjuna.ats.arjuna.tools.RecoveryMonitor"
57-
# we may have > 1 jar, if that is the case we use the most recent one
57+
# for runtime image the modules jar files are under $JBOSS_HOME/modules
5858
recoveryJars=$(find "${JBOSS_HOME}" -name \*.jar | xargs grep -l "${recoveryClass}")
59+
# for builder image the modules jar files are under galleon maven repository
60+
[ -z "${recoveryJars}" ] && recoveryJars=$(find "${GALLEON_LOCAL_MAVEN_REPO}" -name \*.jar | xargs grep -l "${recoveryClass}")
61+
# we may have > 1 jar, if that is the case we use the most recent one
5962
recoveryJar=$(ls -Art $recoveryJars | tail -n 1)
6063
if [ -n "${recoveryJar}" ] ; then
6164
echo "$(date): Executing synchronous recovery scan for a first time"
@@ -73,11 +76,11 @@ function runMigration() {
7376
fi
7477
done
7578

76-
# -- checking if the pod log is clean from errors (only if function of the particular name exists, provided by the os-partition module)
79+
# -- checking if the migration pod log is clean from errors (only if the function exists, provided by the os-eap-txnrecovery module)
7780
if [ $probeStatus -eq 0 ] && [ "$(type -t probePodLogForRecoveryErrors)" = 'function' ]; then
78-
probePodLogForRecoveryErrors
81+
probePodLogForRecoveryErrors "${MIGRATION_POD_NAME}" "${MIGRATION_POD_TIMESTAMP}"
7982
probeStatus=$?
80-
[ $probeStatus -ne 0 ] && echo "The migration container log contains periodic recovery errors, check it for details."
83+
[ $probeStatus -ne 0 ] && echo "The migration container log contains periodic recovery errors or cannot query API, check for details above."
8184
fi
8285

8386
if [ $probeStatus -eq 0 ] ; then

os-eap-txnrecovery/bash/added/partitionPV.sh

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ function startApplicationServer() {
124124
mkdir -p "${SERVER_DATA_DIR}"
125125

126126
if [ ! -f "${SERVER_DATA_DIR}/../data_initialized" ]; then
127-
init_data_dir ${SERVER_DATA_DIR}
127+
init_data_dir ${SERVER_DATA_DIR}
128128
touch "${SERVER_DATA_DIR}/../data_initialized"
129129
fi
130130
fi
@@ -209,7 +209,9 @@ function migratePV() {
209209

210210
(
211211
# 1.a.ii) run recovery until empty (including orphan checks and empty object store hierarchy deletion)
212+
MIGRATION_POD_NAME=${recoveryPodName}
212213
MIGRATION_POD_TIMESTAMP=$(getPodLogTimestamp) # investigating on current pod timestamp
214+
[ $? -ne 0 ] && log_warning "[`date`] Can't get log from the transaction migration pod '${MIGRATION_POD_NAME}', skipping cleanup for ${applicationPodName}" && continue
213215
SERVER_DATA_DIR="${applicationPodDir}/serverData"
214216
NODE_NAME=$(truncate_jboss_node_name "${applicationPodName}") runMigration "${SERVER_DATA_DIR}" &
215217

@@ -336,7 +338,8 @@ function removeRecoveryMarker() {
336338
# parameters:
337339
# - place where pod data directories are saved (podsDir)
338340
function recoveryPodsGarbageCollection() {
339-
local livingPods=($($(dirname ${BASH_SOURCE[0]})/queryosapi.py -q pods_living -f list_space ${DEBUG_QUERY_API_PARAM}))
341+
local livingPods
342+
livingPods=($($(dirname ${BASH_SOURCE[0]})/queryosapi.py -q pods_living -f list_space ${DEBUG_QUERY_API_PARAM}))
340343
if [ $? -ne 0 ]; then # fail to connect to openshift api
341344
log_warning "[`date`] Can't get list of living pods. Can't do recovery marker garbage collection."
342345
return 1
@@ -371,7 +374,14 @@ function getPodLogTimestamp() {
371374
init_pod_name
372375
local podNameToProbe=${1:-$POD_NAME}
373376

374-
local logOutput=$($(dirname ${BASH_SOURCE[0]})/queryosapi.py -q log --pod ${podNameToProbe} --tailline 1 ${DEBUG_QUERY_API_PARAM})
377+
local logOutput
378+
logOutput=$($(dirname ${BASH_SOURCE[0]})/queryosapi.py -q log --pod ${podNameToProbe} --tailline 1 ${DEBUG_QUERY_API_PARAM})
379+
380+
if [ $? -ne 0 ]; then
381+
log_warning "[`date`] Cannot contact OpenShift API to get log for pod ${podNameToProbe} while searching for the log timestamp"
382+
return 1
383+
fi
384+
375385
# only one, last line of the log, is returned, taking the start which is timestamp
376386
echo $logOutput | sed 's/ .*$//'
377387
}
@@ -381,13 +391,17 @@ function getPodLogTimestamp() {
381391
# - pod name (optional)
382392
function probePodLogForRecoveryErrors() {
383393
init_pod_name
394+
local podNameToProbe=${1:-$POD_NAME}
384395
local sinceTimestampParam=''
385-
local sinceTimestamp=${1:-$MIGRATION_POD_TIMESTAMP}
396+
local sinceTimestamp=${2:-$MIGRATION_POD_TIMESTAMP}
386397
[ "x$sinceTimestamp" != "x" ] && sinceTimestampParam="--sincetime ${sinceTimestamp}"
387-
local podNameToProbe=${2:-$POD_NAME}
388398

389-
local logOutput=$($(dirname ${BASH_SOURCE[0]})/queryosapi.py -q log --pod ${podNameToProbe} ${sinceTimestampParam} ${DEBUG_QUERY_API_PARAM})
399+
local logOutput
400+
# even for debug it's too verbose to print the listing of the log
401+
[ "x${SCRIPT_DEBUG}" = "xtrue" ] && set +x
402+
logOutput=$($(dirname ${BASH_SOURCE[0]})/queryosapi.py -q log --pod ${podNameToProbe} ${sinceTimestampParam} ${DEBUG_QUERY_API_PARAM})
390403
local probeStatus=$?
404+
[ "x${SCRIPT_DEBUG}" = "xtrue" ] && set -x
391405

392406
if [ $probeStatus -ne 0 ]; then
393407
log_warning "[`date`] Cannot contact OpenShift API to get log for pod ${podNameToProbe}"

os-eap-txnrecovery/bash/added/queryosapi.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import json
2020
import logging
2121
import osquery
22+
import sys, traceback
2223

2324
from enum import Enum
2425

@@ -106,20 +107,26 @@ def getLog(podName, sinceTime, tailLine):
106107

107108
logger.debug("Starting query openshift api with args: %s", args)
108109

109-
if args.query == QueryType.PODS:
110-
queryResult = getPods()
111-
elif args.query == QueryType.PODS_LIVING:
112-
queryResult = getLivingPods()
113-
elif args.query == QueryType.LOG:
114-
if args.pod is None:
115-
logger.critical('query of type "--query log" requires one argument to be an existing pod name')
110+
try:
111+
if args.query == QueryType.PODS:
112+
queryResult = getPods()
113+
elif args.query == QueryType.PODS_LIVING:
114+
queryResult = getLivingPods()
115+
elif args.query == QueryType.LOG:
116+
if args.pod is None:
117+
logger.critical('query of type "--query log" requires one argument to be an existing pod name')
118+
exit(1)
119+
podName = args.pod
120+
sinceTime = args.sincetime
121+
tailLine = args.tailline
122+
queryResult = getLog(podName, sinceTime, tailLine)
123+
else:
124+
logger.critical('No handler for query type %s', args.query)
116125
exit(1)
117-
podName = args.pod
118-
sinceTime = args.sincetime
119-
tailLine = args.tailline
120-
queryResult = getLog(podName, sinceTime, tailLine)
121-
else:
122-
logger.critical('No handler for query type %s', args.query)
126+
except:
127+
etype, value, tb = sys.exc_info()
128+
logger.critical("Error while request was processed: %s : %s", etype, value)
129+
logger.debug('Exception stacktrace:\n%s', ''.join(traceback.format_exception(etype, value, tb, None)))
123130
exit(1)
124131

125132
if args.format == OutputFormat.LIST_SPACE:
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
export JBOSS_HOME="${BATS_TMPDIR}/jboss_home"
2+
# Prepare the directories and scripts to the right places
3+
mkdir -p "${JBOSS_HOME}/bin/launch"
4+
cp "${BATS_TEST_DIRNAME}/../added/partitionPV.sh" "${JBOSS_HOME}/bin/launch/"
5+
cp "${BATS_TEST_DIRNAME}/../../../test-common/launch-common.sh" "${JBOSS_HOME}/bin/launch/"
6+
cp "${BATS_TEST_DIRNAME}/../../../test-common/logging.sh" "${JBOSS_HOME}/bin/launch/"
7+
# test files
8+
cp "${BATS_TEST_DIRNAME}/test_queryosapi.py" "${JBOSS_HOME}/bin/launch/queryosapi.py"
9+
chmod ugo+x "${JBOSS_HOME}/bin/launch/queryosapi.py"
10+
11+
# Set up the environment variables and load dependencies
12+
source "${JBOSS_HOME}/bin/launch/launch-common.sh"
13+
source "${JBOSS_HOME}/bin/launch/logging.sh"
14+
# Sourcing the script for testing
15+
source "${JBOSS_HOME}/bin/launch/partitionPV.sh"
16+
17+
# places to store the files needed for testing
18+
export SERVER_TEMP_DIR="${BATS_TMPDIR}/server_temp_dir"
19+
export SERVER_RUNNING_MARKER_FILENAME="server.was.started"
20+
21+
setup() {
22+
rm -rf "${SERVER_TEMP_DIR}"
23+
}
24+
25+
# simulating the startup of the server; normally run by 'openshift-launch.sh' script
26+
function runServer() {
27+
# expecting the first parameter to be set
28+
[ "x$1" = "x" ] && echo "The first parameter of meaning instanceDir has to be defined" && return 1
29+
touch "${1}/${SERVER_RUNNING_MARKER_FILENAME}"
30+
(
31+
echo "${I}: Running server with PID $$"
32+
)&
33+
}
34+
35+
# function definition for testing purposes
36+
function init_data_dir() {
37+
echo "init_data_dir executed"
38+
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#!/usr/bin/env bats
2+
3+
load common
4+
5+
@test "arrContains: find existing item" {
6+
ARRAY_TO_TEST=( itemone itemtwo itemthree )
7+
run arrContains itemtwo "${ARRAY_TO_TEST[@]}"
8+
[ "$status" -eq 0 ]
9+
}
10+
11+
@test "arrContains: fail with non existing item" {
12+
ARRAY_TO_TEST=( itemone itemtwo itemthree )
13+
run arrContains itemfour "${ARRAY_TO_TEST[@]}"
14+
[ "$status" -eq 1 ]
15+
}
16+
17+
@test "init_pod_name: NODE_NAME specified" {
18+
NODE_NAME="nodename"
19+
init_pod_name
20+
[ "$?" -eq 0 ]
21+
[ "$POD_NAME" = "$NODE_NAME" ]
22+
}
23+
24+
@test "init_pod_name: JBOSS_NODE_NAME overrides the pod name value" {
25+
NODE_NAME="nodename"
26+
JBOSS_NODE_NAME="jbossnodename"
27+
init_pod_name
28+
[ "$?" -eq 0 ]
29+
[ "$POD_NAME" = "$JBOSS_NODE_NAME" ]
30+
}
31+
32+
@test "init_pod_name: uses the host name or docker container uuid" {
33+
init_pod_name
34+
[ "$?" -eq 0 ]
35+
[ "$POD_NAME" = "$HOSTNAME" ] || [ "$POD_NAME" = "${container_uuid}" ]
36+
}
37+
38+
@test "truncate_jboss_node_name: long name has to be truncated up to 23 characters not started with -" {
39+
CHARACTERS_22_LONG="ABCDEFGHIJKLMNOPQRSTUV"
40+
run truncate_jboss_node_name "moreCharatersHere#-${CHARACTERS_22_LONG}"
41+
[ "$status" -eq 0 ]
42+
[ "$output" = "${CHARACTERS_22_LONG}" ]
43+
}
44+
45+
# Server is started with data directory which is passed to the function in parameter
46+
@test "startApplicationServer: simple start, no split data dir" {
47+
export IS_TX_SQL_BACKEND=false
48+
export IS_SPLIT_DATA_DEFINED=false
49+
50+
SERVER_DATA_DIR="$SERVER_TEMP_DIR"
51+
mkdir -p "$SERVER_DATA_DIR"
52+
53+
POD_NAME="node-name-1"
54+
run startApplicationServer
55+
[ "$status" -eq 0 ]
56+
[ -f "${SERVER_DATA_DIR}/${SERVER_RUNNING_MARKER_FILENAME}" ]
57+
}
58+
59+
# Definition of the split data expects that multiple servers share the same directory for their runtime data
60+
# The startup script has to create separate place for each server
61+
@test "startApplicationServer: split data dir defined, no recovery" {
62+
IS_TX_SQL_BACKEND=false
63+
IS_SPLIT_DATA_DEFINED=true
64+
POD_NAME="node-name-split-data"
65+
66+
run startApplicationServer "${SERVER_TEMP_DIR}"
67+
[ "$status" -eq 0 ]
68+
[ -f "${SERVER_TEMP_DIR}/${POD_NAME}/serverData/${SERVER_RUNNING_MARKER_FILENAME}" ]
69+
[ -f "${SERVER_TEMP_DIR}/${POD_NAME}/data_initialized" ]
70+
}
71+
72+
# The test simulates the recovery in progress
73+
# the recovery processing creates a marker file with the name
74+
# the test creates the marker file with the name of the pod which is about to be started
75+
# the application server is started only after the recovery marker is removed
76+
@test "startApplicationServer: split data dir defined, recovery in progress" {
77+
IS_TX_SQL_BACKEND=false
78+
IS_SPLIT_DATA_DEFINED=true
79+
POD_NAME="node-name-with-recovery"
80+
mkdir -p "${SERVER_TEMP_DIR}"
81+
local recoveryMarkerFileName="${SERVER_TEMP_DIR}/${POD_NAME}-RECOVERY-bats.testing"
82+
83+
touch "$recoveryMarkerFileName"
84+
85+
startApplicationServer "${SERVER_TEMP_DIR}" &
86+
local startApplicationServerPid=$!
87+
sleep "0.01"
88+
89+
[ ! -f "${SERVER_TEMP_DIR}/${POD_NAME}/serverData/${SERVER_RUNNING_MARKER_FILENAME}" ]
90+
rm -f "$recoveryMarkerFileName"
91+
92+
wait $startApplicationServerPid
93+
echo ":: $(kill -0 $testPid)"
94+
95+
[ -f "${SERVER_TEMP_DIR}/${POD_NAME}/serverData/${SERVER_RUNNING_MARKER_FILENAME}" ]
96+
[ -f "${SERVER_TEMP_DIR}/${POD_NAME}/data_initialized" ]
97+
}
98+
99+
@test "recovery marker creation and deletion" {
100+
IS_TX_SQL_BACKEND=false
101+
IS_SPLIT_DATA_DEFINED=true
102+
POD_NAME="recovery-in-progress"
103+
mkdir -p "${SERVER_TEMP_DIR}"
104+
podsDir="${SERVER_TEMP_DIR}"
105+
106+
if isRecoveryInProgress "${SERVER_TEMP_DIR}"; then
107+
# recovery in progress expected to fail
108+
return 1
109+
fi
110+
111+
createRecoveryMarker "${SERVER_TEMP_DIR}" "${POD_NAME}" "recovery-name"
112+
isRecoveryInProgress "${SERVER_TEMP_DIR}"
113+
removeRecoveryMarker "${SERVER_TEMP_DIR}" "${POD_NAME}" "recovery-name"
114+
115+
if isRecoveryInProgress "${SERVER_TEMP_DIR}"; then
116+
# recovery in progress #2 expected to fail
117+
return 1
118+
fi
119+
}
120+
121+
@test "recovery garbage collection cleanup" {
122+
IS_TX_SQL_BACKEND=false
123+
IS_SPLIT_DATA_DEFINED=true
124+
POD_NAME="recovery-garbage-collection"
125+
local recoveryMarkerName="recovery-name"
126+
mkdir -p "${SERVER_TEMP_DIR}"
127+
podsDir="${SERVER_TEMP_DIR}"
128+
129+
run recoveryPodsGarbageCollection "${SERVER_TEMP_DIR}"
130+
[ "$status" -eq 0 ]
131+
132+
createRecoveryMarker "${SERVER_TEMP_DIR}" "${POD_NAME}" $recoveryMarkerName
133+
[ -f "${SERVER_TEMP_DIR}/${POD_NAME}-RECOVERY-${recoveryMarkerName}" ]
134+
135+
run recoveryPodsGarbageCollection "${SERVER_TEMP_DIR}"
136+
[ "$status" -eq 0 ]
137+
[ ! -f "${SERVER_TEMP_DIR}/${POD_NAME}-RECOVERY-${recoveryMarkerName}" ]
138+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#!/bin/python

0 commit comments

Comments
 (0)