@@ -209,7 +209,12 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
209
209
210
210
spin_lock_irqsave (& ndlp -> lock , iflags );
211
211
ndlp -> nlp_flag |= NLP_IN_DEV_LOSS ;
212
- ndlp -> nlp_flag &= ~NLP_NPR_2B_DISC ;
212
+
213
+ /* If there is a PLOGI in progress, and we are in a
214
+ * NLP_NPR_2B_DISC state, don't turn off the flag.
215
+ */
216
+ if (ndlp -> nlp_state != NLP_STE_PLOGI_ISSUE )
217
+ ndlp -> nlp_flag &= ~NLP_NPR_2B_DISC ;
213
218
214
219
/*
215
220
* The backend does not expect any more calls associated with this
@@ -340,6 +345,37 @@ static void lpfc_check_inactive_vmid(struct lpfc_hba *phba)
340
345
lpfc_destroy_vport_work_array (phba , vports );
341
346
}
342
347
348
+ /**
349
+ * lpfc_check_nlp_post_devloss - Check to restore ndlp refcnt after devloss
350
+ * @vport: Pointer to vport object.
351
+ * @ndlp: Pointer to remote node object.
352
+ *
353
+ * If NLP_IN_RECOV_POST_DEV_LOSS flag was set due to outstanding recovery of
354
+ * node during dev_loss_tmo processing, then this function restores the nlp_put
355
+ * kref decrement from lpfc_dev_loss_tmo_handler.
356
+ **/
357
+ void
358
+ lpfc_check_nlp_post_devloss (struct lpfc_vport * vport ,
359
+ struct lpfc_nodelist * ndlp )
360
+ {
361
+ unsigned long iflags ;
362
+
363
+ spin_lock_irqsave (& ndlp -> lock , iflags );
364
+ if (ndlp -> save_flags & NLP_IN_RECOV_POST_DEV_LOSS ) {
365
+ ndlp -> save_flags &= ~NLP_IN_RECOV_POST_DEV_LOSS ;
366
+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
367
+ lpfc_nlp_get (ndlp );
368
+ lpfc_printf_vlog (vport , KERN_INFO , LOG_DISCOVERY | LOG_NODE ,
369
+ "8438 Devloss timeout reversed on DID x%x "
370
+ "refcnt %d ndlp %p flag x%x "
371
+ "port_state = x%x\n" ,
372
+ ndlp -> nlp_DID , kref_read (& ndlp -> kref ), ndlp ,
373
+ ndlp -> nlp_flag , vport -> port_state );
374
+ spin_lock_irqsave (& ndlp -> lock , iflags );
375
+ }
376
+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
377
+ }
378
+
343
379
/**
344
380
* lpfc_dev_loss_tmo_handler - Remote node devloss timeout handler
345
381
* @ndlp: Pointer to remote node object.
@@ -358,6 +394,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
358
394
uint8_t * name ;
359
395
int warn_on = 0 ;
360
396
int fcf_inuse = 0 ;
397
+ bool recovering = false;
398
+ struct fc_vport * fc_vport = NULL ;
361
399
unsigned long iflags ;
362
400
363
401
vport = ndlp -> vport ;
@@ -394,6 +432,64 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
394
432
395
433
/* Fabric nodes are done. */
396
434
if (ndlp -> nlp_type & NLP_FABRIC ) {
435
+ spin_lock_irqsave (& ndlp -> lock , iflags );
436
+ /* In massive vport configuration settings, it's possible
437
+ * dev_loss_tmo fired during node recovery. So, check if
438
+ * fabric nodes are in discovery states outstanding.
439
+ */
440
+ switch (ndlp -> nlp_DID ) {
441
+ case Fabric_DID :
442
+ fc_vport = vport -> fc_vport ;
443
+ if (fc_vport &&
444
+ fc_vport -> vport_state == FC_VPORT_INITIALIZING )
445
+ recovering = true;
446
+ break ;
447
+ case Fabric_Cntl_DID :
448
+ if (ndlp -> nlp_flag & NLP_REG_LOGIN_SEND )
449
+ recovering = true;
450
+ break ;
451
+ case FDMI_DID :
452
+ fallthrough ;
453
+ case NameServer_DID :
454
+ if (ndlp -> nlp_state >= NLP_STE_PLOGI_ISSUE &&
455
+ ndlp -> nlp_state <= NLP_STE_REG_LOGIN_ISSUE )
456
+ recovering = true;
457
+ break ;
458
+ }
459
+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
460
+
461
+ /* Mark an NLP_IN_RECOV_POST_DEV_LOSS flag to know if reversing
462
+ * the following lpfc_nlp_put is necessary after fabric node is
463
+ * recovered.
464
+ */
465
+ if (recovering ) {
466
+ lpfc_printf_vlog (vport , KERN_INFO ,
467
+ LOG_DISCOVERY | LOG_NODE ,
468
+ "8436 Devloss timeout marked on "
469
+ "DID x%x refcnt %d ndlp %p "
470
+ "flag x%x port_state = x%x\n" ,
471
+ ndlp -> nlp_DID , kref_read (& ndlp -> kref ),
472
+ ndlp , ndlp -> nlp_flag ,
473
+ vport -> port_state );
474
+ spin_lock_irqsave (& ndlp -> lock , iflags );
475
+ ndlp -> save_flags |= NLP_IN_RECOV_POST_DEV_LOSS ;
476
+ spin_unlock_irqrestore (& ndlp -> lock , iflags );
477
+ } else if (ndlp -> nlp_state == NLP_STE_UNMAPPED_NODE ) {
478
+ /* Fabric node fully recovered before this dev_loss_tmo
479
+ * queue work is processed. Thus, ignore the
480
+ * dev_loss_tmo event.
481
+ */
482
+ lpfc_printf_vlog (vport , KERN_INFO ,
483
+ LOG_DISCOVERY | LOG_NODE ,
484
+ "8437 Devloss timeout ignored on "
485
+ "DID x%x refcnt %d ndlp %p "
486
+ "flag x%x port_state = x%x\n" ,
487
+ ndlp -> nlp_DID , kref_read (& ndlp -> kref ),
488
+ ndlp , ndlp -> nlp_flag ,
489
+ vport -> port_state );
490
+ return fcf_inuse ;
491
+ }
492
+
397
493
lpfc_nlp_put (ndlp );
398
494
return fcf_inuse ;
399
495
}
@@ -423,6 +519,14 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
423
519
ndlp -> nlp_state , ndlp -> nlp_rpi );
424
520
}
425
521
522
+ /* If we are devloss, but we are in the process of rediscovering the
523
+ * ndlp, don't issue a NLP_EVT_DEVICE_RM event.
524
+ */
525
+ if (ndlp -> nlp_state >= NLP_STE_PLOGI_ISSUE &&
526
+ ndlp -> nlp_state <= NLP_STE_PRLI_ISSUE ) {
527
+ return fcf_inuse ;
528
+ }
529
+
426
530
if (!(ndlp -> fc4_xpt_flags & NVME_XPT_REGD ))
427
531
lpfc_disc_state_machine (vport , ndlp , NULL , NLP_EVT_DEVICE_RM );
428
532
@@ -4363,6 +4467,8 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
4363
4467
goto out ;
4364
4468
}
4365
4469
4470
+ lpfc_check_nlp_post_devloss (vport , ndlp );
4471
+
4366
4472
if (phba -> sli_rev < LPFC_SLI_REV4 )
4367
4473
ndlp -> nlp_rpi = mb -> un .varWords [0 ];
4368
4474
@@ -4540,9 +4646,10 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
4540
4646
void
4541
4647
lpfc_nlp_reg_node (struct lpfc_vport * vport , struct lpfc_nodelist * ndlp )
4542
4648
{
4543
-
4544
4649
unsigned long iflags ;
4545
4650
4651
+ lpfc_check_nlp_post_devloss (vport , ndlp );
4652
+
4546
4653
spin_lock_irqsave (& ndlp -> lock , iflags );
4547
4654
if (ndlp -> fc4_xpt_flags & NLP_XPT_REGD ) {
4548
4655
/* Already registered with backend, trigger rescan */
0 commit comments