@@ -127,44 +127,6 @@ func (e *ExporterHostSyncer) filterExporterInstances(hostName string, exporterIn
127127 return exporterInstances
128128}
129129
130- // handleBootcUpgrade handles bootc upgrade checking and execution
131- func (e * ExporterHostSyncer ) handleBootcUpgrade (hostSsh ssh.HostManager ) (bool , error ) {
132- // Check if bootc upgrade service is already running
133- statusCmd , _ := hostSsh .RunHostCommand ("systemctl is-active bootc-fetch-apply-updates.service bootc-fetch-apply-updates.timer" )
134- if statusCmd != nil {
135- statuses := strings .Fields (statusCmd .Stdout )
136- if len (statuses ) == 2 &&
137- (statuses [0 ] == "active" || statuses [0 ] == "activating" ||
138- statuses [1 ] == "active" || statuses [1 ] == "activating" ) {
139- fmt .Printf (" ⚠️ Bootc upgrade in progress, skipping exporter instances for this host\n " )
140- return true , nil // skip = true
141- }
142- }
143-
144- // Check booted image
145- bootcStdout , err := hostSsh .RunHostCommand ("[ -f /run/ostree-booted ] && bootc upgrade --check" )
146- if err == nil && bootcStdout != nil && bootcStdout .ExitCode == 0 && bootcStdout .Stdout != "" {
147- if strings .HasPrefix (bootcStdout .Stdout , "No changes" ) {
148- if e .dryRun {
149- fmt .Printf (" ✅ Bootc image is up to date\n " )
150- }
151- } else if e .dryRun {
152- fmt .Printf (" 📄 Would upgrade bootc image\n " )
153- } else {
154- // Trigger bootc upgrade timer now. Assuming it uses manual activation (e.g. OnActiveSec=0, RandomizedDelaySec=1h, RemainAfterElapse=false)
155- _ , err := hostSsh .RunHostCommand ("systemctl restart bootc-fetch-apply-updates.timer" )
156- if err != nil {
157- return false , fmt .Errorf ("error triggering bootc upgrade service: %w" , err )
158- }
159- fmt .Printf (" ✅ Bootc upgrade started, skipping exporter instances for this host\n " )
160- return true , nil // skip = true
161- }
162- } else {
163- fmt .Printf (" ℹ️ Not a bootc managed host\n " )
164- }
165- return false , nil // skip = false
166- }
167-
168130// processExporterInstance processes a single exporter instance
169131func (e * ExporterHostSyncer ) processExporterInstance (exporterInstance * api.ExporterInstance , hostSsh ssh.HostManager ) error {
170132 if isDead , deadAnnotation := isExporterInstanceDead (exporterInstance ); isDead {
@@ -222,8 +184,9 @@ func (e *ExporterHostSyncer) calculateBackoffDelay(attempts int) time.Duration {
222184 return delay
223185}
224186
225- // processExporterInstances processes exporter instances and adds failures to global retry queue
226- func (e * ExporterHostSyncer ) processExporterInstances (exporterInstances []* api.ExporterInstance , hostSsh ssh.HostManager , hostName string , retryQueue * []RetryItem ) {
187+ // processExporterInstancesAndBootc processes exporter instances and adds failures to global retry queue
188+ func (e * ExporterHostSyncer ) processExporterInstancesAndBootc (exporterInstances []* api.ExporterInstance , hostSsh ssh.HostManager , hostName string , retryQueue * []RetryItem ) {
189+
227190 for _ , exporterInstance := range exporterInstances {
228191 if err := e .processExporterInstance (exporterInstance , hostSsh ); err != nil {
229192 fmt .Printf (" ❌ Failed to process %s: %v\n " , exporterInstance .Name , err )
@@ -237,6 +200,19 @@ func (e *ExporterHostSyncer) processExporterInstances(exporterInstances []*api.E
237200 })
238201 }
239202 }
203+
204+ if err := hostSsh .HandleBootcUpgrade (e .dryRun ); err != nil {
205+ // For other errors, just log them and continue
206+ fmt .Printf (" ⚠️ Bootc upgrade error: %v\n " , err )
207+ * retryQueue = append (* retryQueue , RetryItem {
208+ ExporterInstance : nil ,
209+ HostSSH : hostSsh ,
210+ HostName : hostName ,
211+ Attempts : 1 ,
212+ LastError : err ,
213+ LastAttemptTime : time .Now (),
214+ })
215+ }
240216}
241217
242218// processGlobalRetryQueue processes the global retry queue with exponential backoff
@@ -272,18 +248,28 @@ func (e *ExporterHostSyncer) processGlobalRetryQueue(retryQueue []RetryItem) err
272248
273249 // Second pass: retry items that are ready
274250 for _ , retryItem := range itemsToRetry {
275- fmt .Printf ("🔄 Retrying %s on %s (attempt %d/%d)...\n " ,
276- retryItem .ExporterInstance .Name , retryItem .HostName , retryItem .Attempts + 1 , e .retryConfig .MaxAttempts )
277-
278- if err := e .processExporterInstance (retryItem .ExporterInstance , retryItem .HostSSH ); err != nil {
279- // Still failed, increment attempts and add to next retry queue
280- retryItem .Attempts ++
281- retryItem .LastError = err
282- retryItem .LastAttemptTime = time .Now ()
283- nextRetryQueue = append (nextRetryQueue , retryItem )
284- fmt .Printf ("❌ Retry failed for %s on %s: %v\n " , retryItem .ExporterInstance .Name , retryItem .HostName , err )
251+ if retryItem .ExporterInstance == nil {
252+ fmt .Printf ("🔄 Retrying bootc upgrade on %s (attempt %d/%d)...\n " ,
253+ retryItem .HostName , retryItem .Attempts + 1 , e .retryConfig .MaxAttempts )
254+ if err := retryItem .HostSSH .HandleBootcUpgrade (e .dryRun ); err != nil {
255+ fmt .Printf ("❌ Retry failed for bootc upgrade on %s: %v\n " , retryItem .HostName , err )
256+ } else {
257+ fmt .Printf ("✅ Retry succeeded for bootc upgrade on %s\n " , retryItem .HostName )
258+ }
285259 } else {
286- fmt .Printf ("✅ Retry succeeded for %s on %s\n " , retryItem .ExporterInstance .Name , retryItem .HostName )
260+ fmt .Printf ("🔄 Retrying instance %s on %s (attempt %d/%d)...\n " ,
261+ retryItem .ExporterInstance .Name , retryItem .HostName , retryItem .Attempts + 1 , e .retryConfig .MaxAttempts )
262+
263+ if err := e .processExporterInstance (retryItem .ExporterInstance , retryItem .HostSSH ); err != nil {
264+ // Still failed, increment attempts and add to next retry queue
265+ retryItem .Attempts ++
266+ retryItem .LastError = err
267+ retryItem .LastAttemptTime = time .Now ()
268+ nextRetryQueue = append (nextRetryQueue , retryItem )
269+ fmt .Printf ("❌ Retry failed for %s on %s: %v\n " , retryItem .ExporterInstance .Name , retryItem .HostName , err )
270+ } else {
271+ fmt .Printf ("✅ Retry succeeded for %s on %s\n " , retryItem .ExporterInstance .Name , retryItem .HostName )
272+ }
287273 }
288274 }
289275
@@ -359,14 +345,8 @@ func (e *ExporterHostSyncer) SyncExporterHosts() error {
359345 fmt .Printf (" ✅ Connection: %s\n " , status )
360346 }
361347
362- if skip , err := e .handleBootcUpgrade (hostSsh ); err != nil {
363- return err
364- } else if skip {
365- continue
366- }
367-
368348 // Process each exporter instance and add failures to global retry queue
369- e .processExporterInstances (exporterInstances , hostSsh , host .Name , & retryQueue )
349+ e .processExporterInstancesAndBootc (exporterInstances , hostSsh , host .Name , & retryQueue )
370350 }
371351
372352 // Second pass: retry all failed instances globally
0 commit comments