-
Notifications
You must be signed in to change notification settings - Fork 13
Open
Milestone
Description
The current logic is overly complicated, and burried in gpbftRunner deep enough that unit testing the raw heuristic itself in terms of conformity to basic design goals is cumbersome if not impossible.
The heuristic itself uses a number of parameters from manifest, and it's not entirely clear if we really need so many knobs to tune the instance start.
- Refactor the heuristic into a stateless function that takes clear set of parameters.
- Add unit tests to assert what the design goals are/should be.
- Then simplify the logic and distil to 1 but no more than 2 parameters.
Lines 389 to 460 in f77d509
| func (h *gpbftRunner) computeNextInstanceStart(cert *certs.FinalityCertificate) (_nextStart time.Time) { | |
| ecDelay := time.Duration(h.manifest.EC.DelayMultiplier * float64(h.manifest.EC.Period)) | |
| head, err := h.ec.GetHead(h.runningCtx) | |
| if err != nil { | |
| // this should not happen | |
| log.Errorf("ec.GetHead returned error: %+v", err) | |
| return h.clock.Now().Add(ecDelay) | |
| } | |
| // the head of the cert becomes the new base | |
| baseTipSet := cert.ECChain.Head() | |
| // we are not trying to fetch the new base tipset from EC as it might not be available | |
| // instead we compute the relative time from the EC.Head | |
| baseTimestamp := computeTipsetTimestampAtEpoch(head, baseTipSet.Epoch, h.manifest.EC.Period) | |
| // Try to align instances while catching up, if configured. | |
| if h.manifest.CatchUpAlignment > 0 { | |
| defer func() { | |
| now := h.clock.Now() | |
| // If we were supposed to start this instance more than one GPBFT round ago, assume | |
| // we're behind and try to align our start times. This helps keep nodes | |
| // in-sync when bootstrapping and catching up. | |
| if _nextStart.Before(now.Add(-h.manifest.CatchUpAlignment)) { | |
| delay := now.Sub(baseTimestamp) | |
| if offset := delay % h.manifest.CatchUpAlignment; offset > 0 { | |
| delay += h.manifest.CatchUpAlignment - offset | |
| } | |
| _nextStart = baseTimestamp.Add(delay) | |
| } | |
| }() | |
| } | |
| lookbackDelay := h.manifest.EC.Period * time.Duration(h.manifest.EC.HeadLookback) | |
| if cert.ECChain.HasSuffix() { | |
| // we decided on something new, the tipset that got finalized can at minimum be 30-60s old. | |
| return baseTimestamp.Add(ecDelay).Add(lookbackDelay) | |
| } | |
| if cert.GPBFTInstance == h.manifest.InitialInstance { | |
| // if we are at initial instance, there is no history to look at | |
| return baseTimestamp.Add(ecDelay).Add(lookbackDelay) | |
| } | |
| backoffTable := h.manifest.EC.BaseDecisionBackoffTable | |
| attempts := 0 | |
| backoffMultipler := 2.0 // baseline 1 + 1 to account for the one ECDelay after which we got the base decistion | |
| for instance := cert.GPBFTInstance - 1; instance > h.manifest.InitialInstance; instance-- { | |
| cert, err := h.certStore.Get(h.runningCtx, instance) | |
| if err != nil { | |
| log.Errorf("error while getting instance %d from certstore: %+v", instance, err) | |
| break | |
| } | |
| if cert.ECChain.HasSuffix() { | |
| break | |
| } | |
| attempts += 1 | |
| if attempts < len(backoffTable) { | |
| backoffMultipler += backoffTable[attempts] | |
| } else { | |
| // if we are beyond backoffTable, reuse the last element | |
| backoffMultipler += backoffTable[len(backoffTable)-1] | |
| } | |
| } | |
| backoff := time.Duration(float64(ecDelay) * backoffMultipler) | |
| log.Debugf("backing off for: %v", backoff) | |
| return baseTimestamp.Add(backoff).Add(lookbackDelay) | |
| } |
Metadata
Metadata
Assignees
Labels
No labels
Type
Projects
Status
Todo