lightninglabs · ffranr · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026
diff --git a/docs/release-notes/release-notes-0.8.0.md b/docs/release-notes/release-notes-0.8.0.md
@@ -21,6 +21,10 @@
 
 # Bug Fixes
 
+- [PR#1927](https://github.com/lightninglabs/taproot-assets/pull/1927) Custodian
+  proof retrieval now retries indefinitely and no longer crashes tapd when a
+  universe proof is temporarily unavailable.
+
 # New Features
 
 ## Functional Enhancements

diff --git a/proof/courier.go b/proof/courier.go
@@ -105,6 +105,34 @@ type CourierCfg struct {
 	LocalArchive Archiver
 }
 
+// Validate checks the courier configuration for invalid values.
+func (c *CourierCfg) Validate() error {
+	if c == nil {
+		return nil
+	}
+
+	if c.HashMailCfg != nil {
+		if err := c.HashMailCfg.Validate(); err != nil {
+			return err
+		}
+	}
+	if c.UniverseRpcCfg != nil {
+		if err := c.UniverseRpcCfg.Validate(); err != nil {
+			return err
+		}
+	}
+
+	if c.TransferLog == nil {
+		return fmt.Errorf("transfer log is nil")
+	}
+
+	if c.LocalArchive == nil {
+		return fmt.Errorf("local archive is nil")
+	}
+
+	return nil
+}
+
 // CourierConnStatus is an enum that represents the different states a courier
 // connection can be in.
 type CourierConnStatus int
@@ -188,10 +216,15 @@ type URLDispatch struct {
 }
 
 // NewCourierDispatch creates a new proof courier dispatch.
-func NewCourierDispatch(cfg *CourierCfg) *URLDispatch {
+func NewCourierDispatch(cfg *CourierCfg) (*URLDispatch, error) {
+	if err := cfg.Validate(); err != nil {
+		return nil, fmt.Errorf("validate proof courier dispatch cfg: "+
+			"%w", err)
+	}
+
 	return &URLDispatch{
 		cfg: cfg,
-	}
+	}, nil
 }
 
 // NewCourier instantiates a new courier service handle given a service URL
@@ -587,8 +620,14 @@ type BackoffCfg struct {
 
 	// NumTries is the maximum number of attempts in a backoff run before
 	// giving up (or before a caller-applied reset delay). A value of zero
-	// means use the built-in default limit.
-	NumTries uint32 `long:"numtries" description:"Maximum number of attempts in a backoff run before giving up (or before the reset delay applies, if used). Zero means use the built-in default limit."`
+	// means use the built-in default limit unless UnlimitedTries is set.
+	// If NumTries is non-zero, then UnlimitedTries must be false.
+	NumTries uint32 `long:"numtries" description:"Maximum number of attempts in a backoff run before giving up (or before the reset delay applies, if used). Zero means use the built-in default limit unless unlimitedtries is set."`
+
+	// UnlimitedTries indicates that we should retry indefinitely until the
+	// transfer succeeds or the context is canceled. If true, NumTries must
+	// be zero.
+	UnlimitedTries bool `long:"unlimitedtries" description:"Retry indefinitely instead of stopping after a fixed number of attempts."`
 
 	// InitialBackoff is the initial backoff time we'll use to wait before
 	// retrying to deliver the proof to the receiver.
@@ -599,6 +638,33 @@ type BackoffCfg struct {
 	MaxBackoff time.Duration `long:"maxbackoff" description:"The maximum backoff time to wait before retrying to deliver the proof to the receiver. Valid time units are {s, m, h}."`
 }
 
+// Validate checks the backoff configuration for invalid or conflicting values.
+func (b *BackoffCfg) Validate() error {
+	if b == nil {
+		return nil
+	}
+
+	switch {
+	case b.UnlimitedTries && b.NumTries != 0:
+		return fmt.Errorf("numtries must be 0 when unlimitedtries is " +
+			"set")
+
+	case b.BackoffResetWait < 0:
+		return fmt.Errorf("backoffresetwait must be >= 0")
+
+	case b.InitialBackoff < 0:
+		return fmt.Errorf("initialbackoff must be >= 0")
+
+	case b.MaxBackoff < 0:
+		return fmt.Errorf("maxbackoff must be >= 0")
+
+	case b.MaxBackoff > 0 && b.InitialBackoff > b.MaxBackoff:
+		return fmt.Errorf("initialbackoff must not exceed maxbackoff")
+	}
+
+	return nil
+}
+
 // BackoffHandler is a handler for the backoff procedure.
 type BackoffHandler struct {
 	// cfg contains the backoff configuration parameters.
@@ -704,11 +770,11 @@ func (b *BackoffHandler) Exec(ctx context.Context, proofLocator Locator,
 		errExec error = nil
 	)
 
-	if numTries == 0 {
+	if numTries == 0 && !b.cfg.UnlimitedTries {
 		numTries = DefaultProofTransferNumTries
 	}
 
-	for i := uint32(0); i < numTries; i++ {
+	for i := uint32(0); b.cfg.UnlimitedTries || i < numTries; i++ {
 		// Before attempting to deliver the proof, log that
 		// an attempted delivery is about to occur.
 		err = b.transferLog.LogProofTransferAttempt(
@@ -802,6 +868,19 @@ type HashMailCourierCfg struct {
 	BackoffCfg *BackoffCfg
 }
 
+// Validate checks the hashmail courier configuration for invalid values.
+func (h *HashMailCourierCfg) Validate() error {
+	if h == nil {
+		return nil
+	}
+
+	if h.BackoffCfg != nil {
+		return h.BackoffCfg.Validate()
+	}
+
+	return nil
+}
+
 // HashMailCourier is a hashmail proof courier service handle. It implements the
 // Courier interface.
 type HashMailCourier struct {
@@ -1217,6 +1296,19 @@ type UniverseRpcCourierCfg struct {
 	ServiceRequestTimeout time.Duration `long:"servicerequestimeout" description:"The maximum duration we'll wait for a courier service to handle our outgoing request during a connection attempt, or when delivering or retrieving a proof."`
 }
 
+// Validate checks the universe RPC courier configuration for invalid values.
+func (u *UniverseRpcCourierCfg) Validate() error {
+	if u == nil {
+		return nil
+	}
+
+	if u.BackoffCfg != nil {
+		return u.BackoffCfg.Validate()
+	}
+
+	return nil
+}
+
 // UniverseRpcCourier is a universe RPC proof courier service handle. It
 // implements the Courier interface.
 type UniverseRpcCourier struct {

diff --git a/sample-tapd.conf b/sample-tapd.conf
@@ -231,9 +231,13 @@
 ; hashmailcourier.backoffresetwait=10m
 
 ; Maximum number of attempts in a backoff run before giving up (or before the
-; reset delay applies, if used). Zero means use the built-in default limit.
+; reset delay applies, if used). Zero means use the built-in default limit
+; unless unlimitedtries is set.
 ; hashmailcourier.numtries=2000
 
+; Retry indefinitely instead of stopping after a fixed number of attempts.
+; hashmailcourier.unlimitedtries=false
+
 ; The initial backoff time to wait before retrying to deliver the proof to the
 ; receiver. Valid time units are {s, m, h}.
 ; hashmailcourier.initialbackoff=30s
@@ -253,9 +257,13 @@
 ; universerpccourier.backoffresetwait=10m
 
 ; Maximum number of attempts in a backoff run before giving up (or before the
-; reset delay applies, if used). Zero means use the built-in default limit.
+; reset delay applies, if used). Zero means use the built-in default limit
+; unless unlimitedtries is set.
 ; universerpccourier.numtries=2000
 
+; Retry indefinitely instead of stopping after a fixed number of attempts.
+; universerpccourier.unlimitedtries=false
+
 ; The initial backoff time to wait before retrying to deliver the proof to the
 ; receiver. Valid time units are {s, m, h}.
 ; universerpccourier.initialbackoff=30s

diff --git a/tapcfg/config.go b/tapcfg/config.go
@@ -672,7 +672,7 @@ func applyProofBackoffDefaults(cfg *proof.BackoffCfg) {
 		return
 	}
 
-	if cfg.NumTries == 0 {
+	if cfg.NumTries == 0 && !cfg.UnlimitedTries {
 		cfg.NumTries = proof.DefaultProofTransferNumTries
 	}
 }

diff --git a/tapcfg/server.go b/tapcfg/server.go
@@ -472,12 +472,55 @@ func genServerConfig(cfg *Config, cfgLogger btclog.Logger,
 	// Addresses can have different proof couriers configured, but both
 	// types of couriers that currently exist will receive this config upon
 	// initialization.
-	proofCourierDispatcher := proof.NewCourierDispatch(&proof.CourierCfg{
+	proofCourierDispatcher, err := proof.NewCourierDispatch(
+		&proof.CourierCfg{
+			HashMailCfg:    cfg.HashMailCourier,
+			UniverseRpcCfg: cfg.UniverseRpcCourier,
+			TransferLog:    assetStore,
+			LocalArchive:   proofArchive,
+		},
+	)
+	if err != nil {
+		return nil, fmt.Errorf("unable to create proof courier "+
+			"dispatcher: %w", err)
+	}
+
+	// Formulate a custodian (proof receive) courier dispatcher.
+	custodianCourierCfg := &proof.CourierCfg{
 		HashMailCfg:    cfg.HashMailCourier,
 		UniverseRpcCfg: cfg.UniverseRpcCourier,
 		TransferLog:    assetStore,
 		LocalArchive:   proofArchive,
-	})
+	}
+
+	if cfg.HashMailCourier != nil {
+		hashmailCfg := *cfg.HashMailCourier
+		if hashmailCfg.BackoffCfg != nil {
+			backoffCfg := *hashmailCfg.BackoffCfg
+			backoffCfg.UnlimitedTries = true
+			backoffCfg.NumTries = 0
+			hashmailCfg.BackoffCfg = &backoffCfg
+		}
+		custodianCourierCfg.HashMailCfg = &hashmailCfg
+	}
+
+	if cfg.UniverseRpcCourier != nil {
+		universeCfg := *cfg.UniverseRpcCourier
+		if universeCfg.BackoffCfg != nil {
+			backoffCfg := *universeCfg.BackoffCfg
+			backoffCfg.UnlimitedTries = true
+			backoffCfg.NumTries = 0
+			universeCfg.BackoffCfg = &backoffCfg
+		}
+		custodianCourierCfg.UniverseRpcCfg = &universeCfg
+	}
+	custodianProofCourierDispatcher, err := proof.NewCourierDispatch(
+		custodianCourierCfg,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("unable to create custodian proof "+
+			"courier dispatcher: %w", err)
+	}
 
 	multiNotifier := proof.NewMultiArchiveNotifier(assetStore, multiverse)
 
@@ -761,7 +804,7 @@ func genServerConfig(cfg *Config, cfgLogger btclog.Logger,
 			ProofArchive:           proofArchive,
 			ProofNotifier:          multiNotifier,
 			ErrChan:                mainErrChan,
-			ProofCourierDispatcher: proofCourierDispatcher,
+			ProofCourierDispatcher: custodianProofCourierDispatcher,
 			MboxBackoffCfg:         cfg.UniverseRpcCourier.BackoffCfg,
 			ProofRetrievalDelay:    cfg.CustodianProofRetrievalDelay,
 			ProofWatcher:           reOrgWatcher,

diff --git a/tapgarden/custodian.go b/tapgarden/custodian.go
@@ -495,7 +495,14 @@ func (c *Custodian) mainEventLoop() error {
 				),
 			)
 
-			return err
+			log.Errorf("Unable to check proof availability for "+
+				"event (outpoint=%v): %v", event.Outpoint, err)
+
+			if fn.ErrorAs[*fn.CriticalError](err) {
+				return err
+			}
+
+			continue
 		}
 
 		// If we did find a proof, we did import it now and are done.
@@ -552,7 +559,8 @@ func (c *Custodian) mainEventLoop() error {
 	for idx := range walletTxns {
 		err := c.inspectWalletTx(&walletTxns[idx])
 		if err != nil {
-			return err
+			log.Errorf("Unable to inspect wallet transaction %v: "+
+				"%v", walletTxns[idx].Tx.TxHash(), err)
 		}
 	}
 
@@ -610,7 +618,12 @@ func (c *Custodian) mainEventLoop() error {
 				return nil
 			}
 
-			return err
+			if fn.ErrorAs[*fn.CriticalError](err) {
+				return err
+			}
+
+			log.Errorf("Non-critical custodian event loop error: "+
+				"%v", err)
 		}
 	}
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -672,7 +672,7 @@ func applyProofBackoffDefaults(cfg *proof.BackoffCfg) { @@
     		return
     	}
-    	if cfg.NumTries == 0 {
+    	if cfg.NumTries == 0 && !cfg.UnlimitedTries {
     		cfg.NumTries = proof.DefaultProofTransferNumTries
     	}
     }
@@ Expand Down @@