Skip to content

Commit e74c9f4

Browse files
gustavoliraclaude
andcommitted
feat(ci): add /test rerun-failed-tests command
Add a new CI command that re-executes only the tests that failed in the previous e2e-ocp-helm run, optimizing time and resources. New files: - retest-failed-utils.sh: Utility functions for fetching JUnit artifacts from GCS, parsing failed tests, and running specific test files - jobs/ocp-rerun-failed-tests.sh: Main job handler that orchestrates fetching previous results, deploying only needed namespaces, and running failed tests The command: - Fetches JUnit results from the previous e2e-ocp-helm run via GCS - Parses which tests failed for showcase and showcase-rbac namespaces - Deploys only the namespaces that had failures - Runs only the tests that previously failed using Playwright - Returns success if no previous run exists or no tests failed Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 4bd1ff1 commit e74c9f4

File tree

3 files changed

+726
-0
lines changed

3 files changed

+726
-0
lines changed
Lines changed: 385 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
#!/bin/bash
2+
#
3+
# Job handler for re-running only the failed tests from a previous e2e-ocp-helm execution.
4+
#
5+
# This job:
6+
# 1. Fetches JUnit results from the previous e2e-ocp-helm run
7+
# 2. Parses which tests failed for each namespace (showcase, showcase-rbac)
8+
# 3. Deploys only the namespaces that had failures
9+
# 4. Runs only the tests that previously failed
10+
#
11+
12+
# shellcheck source=.ibm/pipelines/lib/log.sh
13+
source "${DIR}/lib/log.sh"
14+
# shellcheck source=.ibm/pipelines/playwright-projects.sh
15+
source "${DIR}/playwright-projects.sh"
16+
# shellcheck source=.ibm/pipelines/retest-failed-utils.sh
17+
source "${DIR}/retest-failed-utils.sh"
18+
19+
#######################################
20+
# Main handler for the rerun-failed-tests job
21+
#######################################
22+
handle_ocp_rerun_failed_tests() {
23+
export NAME_SPACE="${NAME_SPACE:-showcase}"
24+
export NAME_SPACE_RBAC="${NAME_SPACE_RBAC:-showcase-rbac}"
25+
export NAME_SPACE_POSTGRES_DB="${NAME_SPACE_POSTGRES_DB:-postgress-external-db}"
26+
27+
log::section "Rerun Failed Tests Job"
28+
29+
# Get PR information
30+
get_pr_info
31+
32+
if [[ -z "${PULL_NUMBER:-}" ]]; then
33+
log::error "PULL_NUMBER is not set. Cannot determine which PR to fetch results for."
34+
log::info "This job should only run in a PR context."
35+
return 1
36+
fi
37+
38+
# Login to OpenShift cluster
39+
log::info "Logging into OpenShift cluster..."
40+
oc_login
41+
log::info "OCP version: $(oc version)"
42+
43+
# Get cluster router base
44+
K8S_CLUSTER_ROUTER_BASE=$(oc get route console -n openshift-console -o=jsonpath='{.spec.host}' | sed 's/^[^.]*\.//')
45+
export K8S_CLUSTER_ROUTER_BASE
46+
47+
# Create temp directory for JUnit files
48+
local temp_dir="/tmp/rerun-failed-tests"
49+
rm -rf "${temp_dir}"
50+
mkdir -p "${temp_dir}"
51+
52+
# Get previous build ID
53+
local build_id
54+
build_id=$(get_previous_failed_build_id "${REPO_OWNER}" "${REPO_NAME}" "${PULL_NUMBER}")
55+
56+
if [[ -z "${build_id}" ]]; then
57+
log::warn "No previous build found for e2e-ocp-helm job."
58+
log::info "Nothing to rerun. Exiting with success."
59+
return 0
60+
fi
61+
62+
log::info "Previous build ID: ${build_id}"
63+
64+
# Fetch and parse JUnit results for each namespace
65+
local showcase_junit="${temp_dir}/showcase-junit.xml"
66+
local showcase_rbac_junit="${temp_dir}/showcase-rbac-junit.xml"
67+
68+
local showcase_url
69+
showcase_url=$(build_previous_run_artifact_url "${REPO_OWNER}" "${REPO_NAME}" "${PULL_NUMBER}" \
70+
"${RERUN_TARGET_JOB}" "${build_id}" "${NAME_SPACE}")
71+
72+
local showcase_rbac_url
73+
showcase_rbac_url=$(build_previous_run_artifact_url "${REPO_OWNER}" "${REPO_NAME}" "${PULL_NUMBER}" \
74+
"${RERUN_TARGET_JOB}" "${build_id}" "${NAME_SPACE_RBAC}")
75+
76+
# Fetch JUnit results
77+
local has_showcase_results=false
78+
local has_rbac_results=false
79+
80+
if fetch_previous_junit_results "${showcase_url}" "${showcase_junit}"; then
81+
has_showcase_results=true
82+
fi
83+
84+
if fetch_previous_junit_results "${showcase_rbac_url}" "${showcase_rbac_junit}"; then
85+
has_rbac_results=true
86+
fi
87+
88+
if [[ "${has_showcase_results}" == "false" && "${has_rbac_results}" == "false" ]]; then
89+
log::warn "Could not fetch JUnit results from previous run."
90+
log::info "The previous run may not have completed or artifacts may have expired."
91+
log::info "Nothing to rerun. Exiting with success."
92+
return 0
93+
fi
94+
95+
# Parse failed tests for each namespace
96+
local -a showcase_failed_tests=()
97+
local -a rbac_failed_tests=()
98+
99+
if [[ "${has_showcase_results}" == "true" ]]; then
100+
local showcase_failures
101+
showcase_failures=$(get_failed_test_count "${showcase_junit}")
102+
log::info "Showcase namespace: ${showcase_failures} failures"
103+
104+
if [[ "${showcase_failures}" -gt 0 ]]; then
105+
mapfile -t showcase_failed_tests < <(parse_failed_tests_from_junit "${showcase_junit}")
106+
# Filter to only existing test files
107+
mapfile -t showcase_failed_tests < <(filter_existing_test_files "${showcase_failed_tests[@]}")
108+
fi
109+
fi
110+
111+
if [[ "${has_rbac_results}" == "true" ]]; then
112+
local rbac_failures
113+
rbac_failures=$(get_failed_test_count "${showcase_rbac_junit}")
114+
log::info "Showcase-RBAC namespace: ${rbac_failures} failures"
115+
116+
if [[ "${rbac_failures}" -gt 0 ]]; then
117+
mapfile -t rbac_failed_tests < <(parse_failed_tests_from_junit "${showcase_rbac_junit}")
118+
# Filter to only existing test files
119+
mapfile -t rbac_failed_tests < <(filter_existing_test_files "${rbac_failed_tests[@]}")
120+
fi
121+
fi
122+
123+
# Check if there are any tests to rerun
124+
if [[ ${#showcase_failed_tests[@]} -eq 0 && ${#rbac_failed_tests[@]} -eq 0 ]]; then
125+
log::success "No failed tests found in previous run!"
126+
log::info "Either all tests passed or the failed test files no longer exist."
127+
return 0
128+
fi
129+
130+
log::section "Tests to Rerun"
131+
log::info "Showcase failed tests: ${#showcase_failed_tests[@]}"
132+
log::info "RBAC failed tests: ${#rbac_failed_tests[@]}"
133+
134+
# Setup cluster (operators, etc.) - needed for deployment
135+
cluster_setup_ocp_helm
136+
137+
# Deploy and test based on which namespaces had failures
138+
local overall_result=0
139+
140+
if [[ ${#showcase_failed_tests[@]} -gt 0 ]]; then
141+
log::section "Rerunning Showcase Failed Tests"
142+
deploy_and_retest_namespace \
143+
"${NAME_SPACE}" \
144+
"${RELEASE_NAME}" \
145+
"${PW_PROJECT_SHOWCASE}" \
146+
showcase_failed_tests[@] || overall_result=1
147+
fi
148+
149+
if [[ ${#rbac_failed_tests[@]} -gt 0 ]]; then
150+
log::section "Rerunning RBAC Failed Tests"
151+
deploy_and_retest_namespace_rbac \
152+
"${NAME_SPACE_RBAC}" \
153+
"${RELEASE_NAME_RBAC}" \
154+
"${PW_PROJECT_SHOWCASE_RBAC}" \
155+
rbac_failed_tests[@] || overall_result=1
156+
fi
157+
158+
# Cleanup temp directory
159+
rm -rf "${temp_dir}"
160+
161+
# Report final result
162+
if [[ ${overall_result} -eq 0 ]]; then
163+
log::success "All rerun tests passed!"
164+
else
165+
log::error "Some rerun tests still failed."
166+
save_overall_result 1
167+
fi
168+
169+
return ${overall_result}
170+
}
171+
172+
#######################################
173+
# Deploy showcase namespace and retest failed tests
174+
# Arguments:
175+
# namespace: The namespace to deploy to
176+
# release_name: Helm release name
177+
# playwright_project: Playwright project name
178+
# failed_tests_ref: Name reference to array of failed test files
179+
#######################################
180+
deploy_and_retest_namespace() {
181+
local namespace="${1}"
182+
local release_name="${2}"
183+
local playwright_project="${3}"
184+
# shellcheck disable=SC2034 # nameref variable used via indirection
185+
local -n failed_tests="${4}"
186+
187+
log::info "Deploying to namespace: ${namespace}"
188+
189+
# Configure namespace
190+
configure_namespace "${namespace}"
191+
deploy_redis_cache "${namespace}"
192+
193+
cd "${DIR}"
194+
195+
local rhdh_base_url="https://${release_name}-developer-hub-${namespace}.${K8S_CLUSTER_ROUTER_BASE}"
196+
apply_yaml_files "${DIR}" "${namespace}" "${rhdh_base_url}"
197+
198+
log::info "Deploying image from repository: ${QUAY_REPO}, TAG_NAME: ${TAG_NAME}"
199+
200+
# Use the same deployment logic as PR jobs (skip orchestrator)
201+
local merged_pr_value_file="/tmp/merged-values_showcase_PR.yaml"
202+
yq_merge_value_files "merge" "${DIR}/value_files/${HELM_CHART_VALUE_FILE_NAME}" "${DIR}/value_files/diff-values_showcase_PR.yaml" "${merged_pr_value_file}"
203+
disable_orchestrator_plugins_in_values "${merged_pr_value_file}"
204+
205+
mkdir -p "${ARTIFACT_DIR}/${namespace}"
206+
cp -a "${merged_pr_value_file}" "${ARTIFACT_DIR}/${namespace}/" || true
207+
208+
# shellcheck disable=SC2046
209+
helm upgrade -i "${release_name}" -n "${namespace}" \
210+
"${HELM_CHART_URL}" --version "${CHART_VERSION}" \
211+
-f "${merged_pr_value_file}" \
212+
--set global.clusterRouterBase="${K8S_CLUSTER_ROUTER_BASE}" \
213+
$(get_image_helm_set_params)
214+
215+
deploy_test_backstage_customization_provider "${namespace}"
216+
217+
# Wait for deployment and run failed tests
218+
local url="https://${release_name}-developer-hub-${namespace}.${K8S_CLUSTER_ROUTER_BASE}"
219+
220+
if check_backstage_running "${release_name}" "${namespace}" "${url}"; then
221+
log::info "Backstage is running. Running failed tests..."
222+
run_failed_tests_and_report "${release_name}" "${namespace}" "${playwright_project}" "${url}" failed_tests[@]
223+
local result=$?
224+
save_all_pod_logs "${namespace}"
225+
return ${result}
226+
else
227+
log::error "Backstage deployment failed in ${namespace}"
228+
save_all_pod_logs "${namespace}"
229+
return 1
230+
fi
231+
}
232+
233+
#######################################
234+
# Deploy showcase-rbac namespace and retest failed tests
235+
# Arguments:
236+
# namespace: The namespace to deploy to
237+
# release_name: Helm release name
238+
# playwright_project: Playwright project name
239+
# failed_tests_ref: Name reference to array of failed test files
240+
#######################################
241+
deploy_and_retest_namespace_rbac() {
242+
local namespace="${1}"
243+
local release_name="${2}"
244+
local playwright_project="${3}"
245+
# shellcheck disable=SC2034 # nameref variable used via indirection
246+
local -n failed_tests="${4}"
247+
248+
log::info "Deploying RBAC to namespace: ${namespace}"
249+
250+
# Configure namespaces
251+
configure_namespace "${NAME_SPACE_POSTGRES_DB}"
252+
configure_namespace "${namespace}"
253+
configure_external_postgres_db "${namespace}"
254+
255+
cd "${DIR}"
256+
257+
local rbac_rhdh_base_url="https://${release_name}-developer-hub-${namespace}.${K8S_CLUSTER_ROUTER_BASE}"
258+
apply_yaml_files "${DIR}" "${namespace}" "${rbac_rhdh_base_url}"
259+
260+
log::info "Deploying RBAC image from repository: ${QUAY_REPO}, TAG_NAME: ${TAG_NAME}"
261+
262+
# Use the same deployment logic as PR jobs (skip orchestrator)
263+
local merged_pr_rbac_value_file="/tmp/merged-values_showcase-rbac_PR.yaml"
264+
yq_merge_value_files "merge" "${DIR}/value_files/${HELM_CHART_RBAC_VALUE_FILE_NAME}" "${DIR}/value_files/diff-values_showcase-rbac_PR.yaml" "${merged_pr_rbac_value_file}"
265+
disable_orchestrator_plugins_in_values "${merged_pr_rbac_value_file}"
266+
267+
mkdir -p "${ARTIFACT_DIR}/${namespace}"
268+
cp -a "${merged_pr_rbac_value_file}" "${ARTIFACT_DIR}/${namespace}/" || true
269+
270+
# shellcheck disable=SC2046
271+
helm upgrade -i "${release_name}" -n "${namespace}" \
272+
"${HELM_CHART_URL}" --version "${CHART_VERSION}" \
273+
-f "${merged_pr_rbac_value_file}" \
274+
--set global.clusterRouterBase="${K8S_CLUSTER_ROUTER_BASE}" \
275+
$(get_image_helm_set_params)
276+
277+
# Wait for deployment and run failed tests
278+
local url="https://${release_name}-developer-hub-${namespace}.${K8S_CLUSTER_ROUTER_BASE}"
279+
280+
if check_backstage_running "${release_name}" "${namespace}" "${url}"; then
281+
log::info "RBAC Backstage is running. Running failed tests..."
282+
run_failed_tests_and_report "${release_name}" "${namespace}" "${playwright_project}" "${url}" failed_tests[@]
283+
local result=$?
284+
save_all_pod_logs "${namespace}"
285+
return ${result}
286+
else
287+
log::error "RBAC Backstage deployment failed in ${namespace}"
288+
save_all_pod_logs "${namespace}"
289+
return 1
290+
fi
291+
}
292+
293+
#######################################
294+
# Run failed tests and save results/artifacts
295+
# Arguments:
296+
# release_name: Helm release name
297+
# namespace: Kubernetes namespace
298+
# playwright_project: Playwright project name
299+
# url: Backstage URL
300+
# failed_tests_ref: Name reference to array of failed test files
301+
#######################################
302+
run_failed_tests_and_report() {
303+
local release_name="${1}"
304+
local namespace="${2}"
305+
local playwright_project="${3}"
306+
local url="${4}"
307+
# shellcheck disable=SC2034 # nameref variable used via indirection
308+
local -n test_files="${5}"
309+
310+
CURRENT_DEPLOYMENT=$((CURRENT_DEPLOYMENT + 1))
311+
save_status_deployment_namespace "${CURRENT_DEPLOYMENT}" "${namespace}"
312+
save_status_failed_to_deploy "${CURRENT_DEPLOYMENT}" false
313+
314+
BASE_URL="${url}"
315+
export BASE_URL
316+
317+
log::info "BASE_URL: ${BASE_URL}"
318+
log::info "Running ${#test_files[@]} previously failed tests for project '${playwright_project}'"
319+
320+
cd "${DIR}/../../e2e-tests"
321+
local e2e_tests_dir
322+
e2e_tests_dir=$(pwd)
323+
324+
yarn install --immutable > /tmp/yarn.install.log.txt 2>&1
325+
local install_status=$?
326+
if [[ ${install_status} -ne 0 ]]; then
327+
log::error "=== YARN INSTALL FAILED ==="
328+
cat /tmp/yarn.install.log.txt
329+
return ${install_status}
330+
fi
331+
log::success "Yarn install completed successfully."
332+
333+
yarn playwright install chromium
334+
335+
Xvfb :99 &
336+
export DISPLAY=:99
337+
338+
# Run only the specific failed test files
339+
(
340+
set -e
341+
log::info "Using PR container image: ${TAG_NAME}"
342+
log::info "Running tests: ${test_files[*]}"
343+
yarn playwright test --project="${playwright_project}" "${test_files[@]}"
344+
) 2>&1 | tee "/tmp/${LOGFILE}"
345+
346+
local result=${PIPESTATUS[0]}
347+
348+
pkill Xvfb || true
349+
350+
# Save artifacts
351+
mkdir -p "${ARTIFACT_DIR}/${namespace}/test-results"
352+
mkdir -p "${ARTIFACT_DIR}/${namespace}/attachments/screenshots"
353+
cp -a "${e2e_tests_dir}/test-results/"* "${ARTIFACT_DIR}/${namespace}/test-results" || true
354+
cp -a "${e2e_tests_dir}/${JUNIT_RESULTS}" "${ARTIFACT_DIR}/${namespace}/${JUNIT_RESULTS}" || true
355+
if [[ "${CI}" == "true" ]]; then
356+
cp "${ARTIFACT_DIR}/${namespace}/${JUNIT_RESULTS}" "${SHARED_DIR}/junit-results-${namespace}.xml" || true
357+
fi
358+
359+
cp -a "${e2e_tests_dir}/screenshots/"* "${ARTIFACT_DIR}/${namespace}/attachments/screenshots/" || true
360+
ansi2html < "/tmp/${LOGFILE}" > "/tmp/${LOGFILE}.html"
361+
cp -a "/tmp/${LOGFILE}.html" "${ARTIFACT_DIR}/${namespace}" || true
362+
cp -a "${e2e_tests_dir}/playwright-report/"* "${ARTIFACT_DIR}/${namespace}" || true
363+
364+
log::info "Rerun tests in namespace '${namespace}' RESULT: ${result}"
365+
366+
if [[ ${result} -ne 0 ]]; then
367+
save_overall_result 1
368+
save_status_test_failed "${CURRENT_DEPLOYMENT}" true
369+
else
370+
save_status_test_failed "${CURRENT_DEPLOYMENT}" false
371+
fi
372+
373+
# Count failures from new JUnit results
374+
if [[ -f "${e2e_tests_dir}/${JUNIT_RESULTS}" ]]; then
375+
local failed_tests_count
376+
failed_tests_count=$(grep -oP 'failures="\K[0-9]+' "${e2e_tests_dir}/${JUNIT_RESULTS}" | head -n 1)
377+
log::info "Number of failed tests after rerun: ${failed_tests_count:-0}"
378+
save_status_number_of_test_failed "${CURRENT_DEPLOYMENT}" "${failed_tests_count:-0}"
379+
else
380+
log::warn "JUnit results file not found: ${e2e_tests_dir}/${JUNIT_RESULTS}"
381+
save_status_number_of_test_failed "${CURRENT_DEPLOYMENT}" "unknown"
382+
fi
383+
384+
return ${result}
385+
}

.ibm/pipelines/openshift-ci-tests.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,13 @@ main() {
133133
log::info "Calling handle_ocp_operator"
134134
handle_ocp_operator
135135
;;
136+
*rerun-failed-tests*)
137+
log::info "Sourcing ocp-rerun-failed-tests.sh"
138+
# shellcheck source=.ibm/pipelines/jobs/ocp-rerun-failed-tests.sh
139+
source "${DIR}/jobs/ocp-rerun-failed-tests.sh"
140+
log::info "Calling handle_ocp_rerun_failed_tests"
141+
handle_ocp_rerun_failed_tests
142+
;;
136143
*pull*ocp*helm*)
137144
log::info "Sourcing ocp-pull.sh"
138145
# shellcheck source=.ibm/pipelines/jobs/ocp-pull.sh

0 commit comments

Comments
 (0)