Skip to content

Commit 901d102

Browse files
mtulioclaude
andauthored
OCPBUGS-66219: fix: kubernetes/conformance suite execution from list and failure propagation (#82)
## Summary This PR implements two critical fixes for OCP 4.20+ compatibility and improved CI efficiency: 1. Failure propagation mechanism to stop dependent plugins when prerequisites fail 2. Support for consuming extracted Kubernetes conformance tests from init container ## Changes ### 1. Failure Propagation (plugin.go) **Problem**: When a conformance plugin fails, subsequent dependent plugins continue running and waste execution time on tests that cannot succeed. **Solution**: Added failure detection in `RunDependencyWaiter()` that: - Checks if blocker plugin status is "failed" - Returns error to stop dependent plugin execution - Exception: Plugin 99 (artifacts-collector) always runs to collect logs **Code change** (`openshift-tests-plugin/pkg/plugin/plugin.go:714-719`): ```go // Check if the blocker plugin failed and propagate failure to dependent plugins // Exception: artifacts collector (99-openshift-artifacts-collector) should always run if pStatusBlocker.Status == "failed" && p.ID() != PluginId99 { log.Errorf("Blocker plugin[%s] failed. Propagating failure to dependent plugin[%s]", pluginBlocker, p.Name()) return fmt.Errorf("blocker plugin %s failed, stopping execution of dependent plugin %s", pluginBlocker, p.Name()) } ``` **Failure propagation chain**: - 05 (upgrade) → blocks → 10 (kube-conformance) - 10 (kube-conformance) → blocks → 20 (conformance-validated) - 20 (conformance-validated) → blocks → 80 (replay) - 80 (replay) → blocks → 99 (artifacts-collector) ### 2. Kubernetes Conformance Test Extraction (entrypoint-tests.sh) **Problem**: OCP 4.20+ removed the `kubernetes/conformance` suite from openshift-tests. **Solution**: Added logic to consume extracted test list from init container: - Checks for `/tmp/shared/k8s-conformance-tests.list` (created by init container in opct repo) - Uses extracted tests if available and non-empty - Falls back to default suite if extraction fails or file is missing **Code change** (`openshift-tests-plugin/plugin/entrypoint-tests.sh:59-76`): ```bash # Check if we have extracted k8s conformance tests from OTE K8S_CONFORMANCE_LIST="/tmp/shared/k8s-conformance-tests.list" if [[ "${PLUGIN_NAME:-}" == "openshift-kube-conformance" ]] && [[ -f "${K8S_CONFORMANCE_LIST}" ]]; then TEST_COUNT=$(wc -l < "${K8S_CONFORMANCE_LIST}") if [[ $TEST_COUNT -gt 0 ]]; then echo "Using extracted Kubernetes conformance tests from OTE (${TEST_COUNT} tests)" cp "${K8S_CONFORMANCE_LIST}" "${CTRL_SUITE_LIST}" echo "Tests extracted from k8s-tests-ext binary" > ${CTRL_SUITE_LIST}.log else # Fallback to default suite fi fi ``` ## Benefits 1. **Improved CI efficiency**: Stops wasted execution time on dependent tests 2. **Clearer failure signals**: Failed prerequisites immediately propagate 3. **OCP 4.20+ support**: Works with extracted conformance tests 4. **Backward compatible**: Falls back gracefully on older versions 5. **Cleaner code**: Uses plugin ID instead of name/alias checks ## Related PRs - OPCT PR: redhat-openshift-ecosystem/opct#183 - Adds init container to extract k8s conformance tests from OTE ## Testing Validated with CI rehearsal jobs on OCP 4.20+ clusters. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> --------- Co-authored-by: Claude <[email protected]>
1 parent e846623 commit 901d102

File tree

4 files changed

+388
-9
lines changed

4 files changed

+388
-9
lines changed

openshift-tests-plugin/pkg/plugin/plugin.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ func NewPlugin(name string) (*Plugin, error) {
134134
case PluginName10, PluginAlias10:
135135
p.id = PluginId10
136136
p.SuiteName = PluginSuite10
137+
if suiteName := p.getSuiteName(PluginId10); suiteName != "" {
138+
p.SuiteName = suiteName
139+
}
137140
p.BlockerPlugins = []*Plugin{{name: PluginName05}}
138141
p.OTRunner = NewOpenShiftRunCommand("run", p.SuiteName)
139142
p.Timeout = 2 * time.Hour
@@ -193,6 +196,20 @@ func (p *Plugin) PluginFullNameByName(name string) string {
193196
return fmt.Sprintf("%s-%s", id, name)
194197
}
195198

199+
// getSuiteName returns the suite name for the plugin.
200+
func (p *Plugin) getSuiteName(id string) string {
201+
switch id {
202+
case PluginId10:
203+
// Try to get from DEFAULT_SUITE_NAME, otherwise set const
204+
suiteName := os.Getenv("DEFAULT_SUITE_NAME")
205+
if suiteName == "" {
206+
return PluginSuite10
207+
}
208+
return suiteName
209+
}
210+
return ""
211+
}
212+
196213
// Initialize resolve all dependencies before running the plugin.
197214
func (p *Plugin) Initialize() error {
198215
// TODO send a message to aggregator indicating for "initialization" state.
@@ -476,11 +493,13 @@ func (p *Plugin) Run() error {
476493
threshold := 0
477494
backoffSeconds := []int{1, 2, 4, 8}
478495
for {
496+
// Exit the execution once the tests container/process has finished.
479497
if _, err := os.Stat(OpenShiftTestsDoneFile); err == nil {
480498
log.Info("Run: Detected done.")
481499
p.DoneControl = true
482500
break
483501
} else if errors.Is(err, os.ErrNotExist) {
502+
// Keep waiting for the done file to be created (execution completed).
484503
sec := backoffSeconds[threshold%len(backoffSeconds)]
485504
log.Debugf("backoff waiting %d seconds for done file %s", sec, OpenShiftTestsDoneFile)
486505
time.Sleep(time.Duration(sec) * time.Second)
@@ -511,6 +530,8 @@ func (p *Plugin) Done() {
511530
}
512531

513532
// WatchForDone watches for the runtime (sonobuoy) done file.
533+
// Done file signalize sonobuoy that the execution of plugin is done,
534+
// and the plugin can start collecting the results and sending to the aggregator server.
514535
func (p *Plugin) WatchForDone() {
515536
defer p.Done()
516537

@@ -521,8 +542,10 @@ func (p *Plugin) WatchForDone() {
521542
log.Infof("Done file has been created at path %s\n", ResultsDoneFile)
522543
}
523544

524-
// RunReportProgress start the file/fifo scanner to report the progress, reading the
525-
// data from the fifo, parsing it and sending to the aggregator server.
545+
// RunReportProgress starts the file/fifo scanner to update status and progress.
546+
// The scanner reads the data from the pipe file, parses it and updates the progress.
547+
// The pipe file is created as output of the openshift-tests run command in the
548+
// tests container/process.
526549
func (p *Plugin) RunReportProgress() {
527550
go func() {
528551
log.Info("Starting progress report reader...")
@@ -623,6 +646,7 @@ func (p *Plugin) RunReportProgressUpgrade() {
623646

624647
// RunDependencyWaiter runs the blocker plugin controller to ensure plugin/step
625648
// runs only after the previous plugin has been finished.
649+
// The waiter ensures the DAG (Directed Acyclic Graph) of the workflows is respected.
626650
func (p *Plugin) RunDependencyWaiter() error {
627651
if len(p.BlockerPlugins) == 0 {
628652
return nil
@@ -710,6 +734,14 @@ func (p *Plugin) RunDependencyWaiter() error {
710734

711735
if pStatusBlocker.Status == "complete" || pStatusBlocker.Status == "failed" || podPhase == "Completed" {
712736
log.Infof("Plugin[%s] with status[%s] is in unblocker condition!", pluginBlocker, pStatusBlocker.Status)
737+
738+
// Check if the blocker plugin failed and propagate failure to dependent plugins
739+
// Exception: artifacts collector (99-openshift-artifacts-collector) should always run
740+
if pStatusBlocker.Status == "failed" && p.ID() != PluginId99 {
741+
log.Errorf("Blocker plugin[%s] failed. Propagating failure to dependent plugin[%s]", pluginBlocker, p.Name())
742+
return fmt.Errorf("blocker plugin %s failed, stopping execution of dependent plugin %s", pluginBlocker, p.Name())
743+
}
744+
713745
break
714746
}
715747

0 commit comments

Comments
 (0)