From 2de12d8d14354d5dad7501564ee20f3baa63906e Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 21:26:29 +0200 Subject: [PATCH 01/62] feat(shard distributor): add shard key helpers and metrics state Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/etcdkeys/etcdkeys.go | 25 +++++++++++++++++++ service/sharddistributor/store/state.go | 7 ++++++ 2 files changed, 32 insertions(+) diff --git a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go index aad2638e35e..998e82af078 100644 --- a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go +++ b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go @@ -11,6 +11,7 @@ const ( ExecutorReportedShardsKey = "reported_shards" ExecutorAssignedStateKey = "assigned_state" ShardAssignedKey = "assigned" + ShardMetricsKey = "metrics" ) var validKeyTypes = []string{ @@ -57,3 +58,27 @@ func ParseExecutorKey(prefix string, namespace, key string) (executorID, keyType } return parts[0], parts[1], nil } + +func BuildShardPrefix(prefix string, namespace string) string { + return fmt.Sprintf("%s/shards/", BuildNamespacePrefix(prefix, namespace)) +} + +func BuildShardKey(prefix string, namespace, shardID, keyType string) (string, error) { + if keyType != ShardAssignedKey && keyType != ShardMetricsKey { + return "", fmt.Errorf("invalid shard key type: %s", keyType) + } + return fmt.Sprintf("%s%s/%s", BuildShardPrefix(prefix, namespace), shardID, keyType), nil +} + +func ParseShardKey(prefix string, namespace, key string) (shardID, keyType string, err error) { + prefix = BuildShardPrefix(prefix, namespace) + if !strings.HasPrefix(key, prefix) { + return "", "", fmt.Errorf("key '%s' does not have expected prefix '%s'", key, prefix) + } + remainder := strings.TrimPrefix(key, prefix) + parts := strings.Split(remainder, "/") + if len(parts) != 2 { + return "", "", fmt.Errorf("unexpected shard key format: %s", key) + } + return parts[0], parts[1], nil +} diff --git a/service/sharddistributor/store/state.go b/service/sharddistributor/store/state.go index 2020df0988d..455f48e1e2a 100644 --- a/service/sharddistributor/store/state.go +++ b/service/sharddistributor/store/state.go @@ -18,6 +18,7 @@ type AssignedState struct { type NamespaceState struct { Executors map[string]HeartbeatState + ShardMetrics map[string]ShardMetrics ShardAssignments map[string]AssignedState GlobalRevision int64 } @@ -25,3 +26,9 @@ type NamespaceState struct { type ShardState struct { ExecutorID string } + +type ShardMetrics struct { + SmoothedLoad float64 `json:"smoothed_load"` // EWMA of shard load that persists across executor changes + LastUpdateTime int64 `json:"last_update_time"` // heartbeat timestamp that last updated the EWMA + LastMoveTime int64 `json:"last_move_time"` // timestamp for the latest reassignment, used for cooldowns +} From 5d95067e90c5b592bbb0ed3d255fac0b99e07146 Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 22:10:17 +0200 Subject: [PATCH 02/62] feat(shard distributor): persist shard metrics in etcd store Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 122 +++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index e8fc6f50d73..9f51b163256 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -201,6 +201,7 @@ func (s *executorStoreImpl) GetHeartbeat(ctx context.Context, namespace string, func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*store.NamespaceState, error) { heartbeatStates := make(map[string]store.HeartbeatState) assignedStates := make(map[string]store.AssignedState) + shardMetrics := make(map[string]store.ShardMetrics) executorPrefix := etcdkeys.BuildExecutorPrefix(s.prefix, namespace) resp, err := s.client.Get(ctx, executorPrefix, clientv3.WithPrefix()) @@ -242,10 +243,38 @@ func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*st assignedStates[executorID] = assigned } + // Fetch shard-level metrics stored under shard namespace keys. + shardPrefix := etcdkeys.BuildShardPrefix(s.prefix, namespace) + shardResp, err := s.client.Get(ctx, shardPrefix, clientv3.WithPrefix()) + if err != nil { + return nil, fmt.Errorf("get shard data: %w", err) + } + for _, kv := range shardResp.Kvs { + shardID, shardKeyType, err := etcdkeys.ParseShardKey(s.prefix, namespace, string(kv.Key)) + if err != nil { + continue + } + if shardKeyType != etcdkeys.ShardMetricsKey { + continue + } + var shardMetric store.ShardMetrics + if err := json.Unmarshal(kv.Value, &shardMetric); err != nil { + continue + } + shardMetrics[shardID] = shardMetric + } + + // Compute a global revision that reflects both executor and shard state. + globalRevision := resp.Header.Revision + if shardResp.Header.Revision > globalRevision { + globalRevision = shardResp.Header.Revision + } + return &store.NamespaceState{ Executors: heartbeatStates, + ShardMetrics: shardMetrics, ShardAssignments: assignedStates, - GlobalRevision: resp.Header.Revision, + GlobalRevision: globalRevision, }, nil } @@ -294,6 +323,67 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, var ops []clientv3.Op var comparisons []clientv3.Cmp + // Compute shard moves to update last_move_time metrics when ownership changes. + // Read current assignments for the namespace and compare with the new state. + // Any concurrent change will be caught by the revision comparisons below. + currentAssignments := make(map[string]string) // shardID -> executorID + executorPrefix := etcdkeys.BuildExecutorPrefix(s.prefix, namespace) + resp, err := s.client.Get(ctx, executorPrefix, clientv3.WithPrefix()) + if err != nil { + return fmt.Errorf("get current assignments: %w", err) + } + for _, kv := range resp.Kvs { + executorID, keyType, keyErr := etcdkeys.ParseExecutorKey(s.prefix, namespace, string(kv.Key)) + if keyErr != nil || keyType != etcdkeys.ExecutorAssignedStateKey { + continue + } + var state store.AssignedState + if err := json.Unmarshal(kv.Value, &state); err != nil { + return fmt.Errorf("unmarshal current assigned state: %w", err) + } + for shardID := range state.AssignedShards { + currentAssignments[shardID] = executorID + } + } + + // Build new owner map and detect moved shards. + newAssignments := make(map[string]string) + for executorID, state := range request.NewState.ShardAssignments { + for shardID := range state.AssignedShards { + newAssignments[shardID] = executorID + } + } + now := time.Now().Unix() + for shardID, newOwner := range newAssignments { + if oldOwner, ok := currentAssignments[shardID]; ok && oldOwner == newOwner { + continue + } + // Owner changed or new shard: update metrics.last_move_time while preserving existing metrics when present. + shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) + if err != nil { + return fmt.Errorf("build shard metrics key: %w", err) + } + var shardMetrics store.ShardMetrics + metricsResp, err := s.client.Get(ctx, shardMetricsKey) + if err != nil { + return fmt.Errorf("get shard metrics: %w", err) + } + if len(metricsResp.Kvs) > 0 { + if err := json.Unmarshal(metricsResp.Kvs[0].Value, &shardMetrics); err != nil { + return fmt.Errorf("unmarshal shard metrics: %w", err) + } + } else { + shardMetrics.SmoothedLoad = 0 + shardMetrics.LastUpdateTime = now + } + shardMetrics.LastMoveTime = now + payload, err := json.Marshal(shardMetrics) + if err != nil { + return fmt.Errorf("marshal shard metrics: %w", err) + } + ops = append(ops, clientv3.OpPut(shardMetricsKey, string(payload))) + } + // 1. Prepare operations to update executor states and shard ownership, // and comparisons to check for concurrent modifications. for executorID, state := range request.NewState.ShardAssignments { @@ -369,6 +459,10 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, if err != nil { return fmt.Errorf("build executor status key: %w", err) } + shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) + if err != nil { + return fmt.Errorf("build shard metrics key: %w", err) + } // Use a read-modify-write loop to handle concurrent updates safely. for { @@ -379,6 +473,7 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, } var state store.AssignedState + var shardMetrics store.ShardMetrics modRevision := int64(0) // A revision of 0 means the key doesn't exist yet. if len(resp.Kvs) > 0 { @@ -393,6 +488,21 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, state.AssignedShards = make(map[string]*types.ShardAssignment) } + metricsResp, err := s.client.Get(ctx, shardMetricsKey) + if err != nil { + return fmt.Errorf("get shard metrics: %w", err) + } + if len(metricsResp.Kvs) > 0 { + if err := json.Unmarshal(metricsResp.Kvs[0].Value, &shardMetrics); err != nil { + return fmt.Errorf("unmarshal shard metrics: %w", err) + } + } else { + now := time.Now().Unix() + shardMetrics.SmoothedLoad = 0 + shardMetrics.LastUpdateTime = now + shardMetrics.LastMoveTime = now + } + // 2. Modify the state in memory, adding the new shard if it's not already there. if _, alreadyAssigned := state.AssignedShards[shardID]; !alreadyAssigned { state.AssignedShards[shardID] = &types.ShardAssignment{Status: types.AssignmentStatusREADY} @@ -403,6 +513,11 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, return fmt.Errorf("marshal new assigned state: %w", err) } + newMetricsValue, err := json.Marshal(shardMetrics) + if err != nil { + return fmt.Errorf("marshal new shard metrics: %w", err) + } + var comparisons []clientv3.Cmp // 3. Prepare and commit the transaction with three atomic checks. @@ -428,7 +543,10 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, txnResp, err := s.client.Txn(ctx). If(comparisons...). - Then(clientv3.OpPut(assignedState, string(newStateValue))). + Then( + clientv3.OpPut(assignedState, string(newStateValue)), + clientv3.OpPut(shardMetricsKey, string(newMetricsValue)), + ). Commit() if err != nil { From 6e5753611c4abdc2d27c19067f164b784048e6fb Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 22:40:33 +0200 Subject: [PATCH 03/62] fix(shard distributor): update LastMoveTime in the case where a shard is being reassigned in AssignShard Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../sharddistributor/store/etcd/executorstore/etcdstore.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 9f51b163256..77f39746c09 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -492,12 +492,17 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, if err != nil { return fmt.Errorf("get shard metrics: %w", err) } + now := time.Now().Unix() if len(metricsResp.Kvs) > 0 { if err := json.Unmarshal(metricsResp.Kvs[0].Value, &shardMetrics); err != nil { return fmt.Errorf("unmarshal shard metrics: %w", err) } + // Metrics already exist, update the last move time. + // This can happen if the shard was previously assigned to an executor, and a lookup happens after the executor is deleted, + // AssignShard is then called to assign the shard to a new executor. + shardMetrics.LastMoveTime = now } else { - now := time.Now().Unix() + // Metrics don't exist, initialize them. shardMetrics.SmoothedLoad = 0 shardMetrics.LastUpdateTime = now shardMetrics.LastMoveTime = now From 595d32094b230901b36ce2fbb209edd046d7689f Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 22:41:11 +0200 Subject: [PATCH 04/62] test(shard distributor): add tests for shard metrics Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../etcd/executorstore/etcdstore_test.go | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index ff3dc815e36..3ff69ef60cb 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -505,6 +505,59 @@ func TestAssignShardErrors(t *testing.T) { assert.ErrorIs(t, err, store.ErrVersionConflict, "Error should be ErrVersionConflict for non-active executor") } +// TestShardMetricsPersistence verifies that shard metrics are preserved on assignment +// when they already exist, and that GetState exposes them. +func TestShardMetricsPersistence(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "exec-metrics" + shardID := "shard-metrics" + + // 1. Setup: ensure executor is ACTIVE + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, store.HeartbeatState{Status: types.ExecutorStatusACTIVE})) + + // 2. Pre-create shard metrics as if coming from prior history + m := store.ShardMetrics{SmoothedLoad: 12.5, LastUpdateTime: 1234, LastMoveTime: 5678} + shardMetricsKey, err := etcdkeys.BuildShardKey(tc.EtcdPrefix, tc.Namespace, shardID, etcdkeys.ShardMetricsKey) + require.NoError(t, err) + payload, err := json.Marshal(m) + require.NoError(t, err) + _, err = tc.Client.Put(ctx, shardMetricsKey, string(payload)) + require.NoError(t, err) + + // 3. Assign the shard via AssignShard (should not clobber existing metrics) + require.NoError(t, executorStore.AssignShard(ctx, tc.Namespace, shardID, executorID)) + + // 4. Verify via GetState that metrics are preserved and exposed + nsState, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + require.Contains(t, nsState.ShardMetrics, shardID) + updatedMetrics := nsState.ShardMetrics[shardID] + assert.Equal(t, m.SmoothedLoad, updatedMetrics.SmoothedLoad) + assert.Equal(t, m.LastUpdateTime, updatedMetrics.LastUpdateTime) + // This should be greater than the last move time + assert.Greater(t, updatedMetrics.LastMoveTime, m.LastMoveTime) + + // 5. Also ensure assignment recorded correctly + require.Contains(t, nsState.ShardAssignments[executorID].AssignedShards, shardID) +} + +// TestGetShardMetricsForMissingShard verifies GetState does not report metrics for unknown shards. +func TestGetShardMetricsForMissingShard(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // No metrics are written; GetState should not contain unknown shard + st, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + assert.NotContains(t, st.ShardMetrics, "unknown") +} + // --- Test Setup --- func stringStatus(s types.ExecutorStatus) string { From d9ba54da03ce963b8bf5ede6232f9a15dfea4222 Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 22:53:30 +0200 Subject: [PATCH 05/62] fix(shard distributor): modify comment Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- service/sharddistributor/store/etcd/executorstore/etcdstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 77f39746c09..889ff32649d 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -466,7 +466,7 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, // Use a read-modify-write loop to handle concurrent updates safely. for { - // 1. Get the current assigned state of the executor. + // 1. Get the current assigned state of the executor and prepare the shard metrics. resp, err := s.client.Get(ctx, assignedState) if err != nil { return fmt.Errorf("get executor state: %w", err) From 32d2ecdbb6bec5dce803ba84c6ddafb0da878fa6 Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 23:13:47 +0200 Subject: [PATCH 06/62] fix(shard distributor): add atomic check to prevent metrics race Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 889ff32649d..4ec7f7f0568 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -364,11 +364,13 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return fmt.Errorf("build shard metrics key: %w", err) } var shardMetrics store.ShardMetrics + metricsModRevision := int64(0) metricsResp, err := s.client.Get(ctx, shardMetricsKey) if err != nil { return fmt.Errorf("get shard metrics: %w", err) } if len(metricsResp.Kvs) > 0 { + metricsModRevision = metricsResp.Kvs[0].ModRevision if err := json.Unmarshal(metricsResp.Kvs[0].Value, &shardMetrics); err != nil { return fmt.Errorf("unmarshal shard metrics: %w", err) } @@ -382,6 +384,7 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return fmt.Errorf("marshal shard metrics: %w", err) } ops = append(ops, clientv3.OpPut(shardMetricsKey, string(payload))) + comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(shardMetricsKey), "=", metricsModRevision)) } // 1. Prepare operations to update executor states and shard ownership, @@ -493,7 +496,9 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, return fmt.Errorf("get shard metrics: %w", err) } now := time.Now().Unix() + metricsModRevision := int64(0) if len(metricsResp.Kvs) > 0 { + metricsModRevision = metricsResp.Kvs[0].ModRevision if err := json.Unmarshal(metricsResp.Kvs[0].Value, &shardMetrics); err != nil { return fmt.Errorf("unmarshal shard metrics: %w", err) } @@ -525,11 +530,12 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, var comparisons []clientv3.Cmp - // 3. Prepare and commit the transaction with three atomic checks. + // 3. Prepare and commit the transaction with four atomic checks. // a) Check that the executor's status is ACTIVE. comparisons = append(comparisons, clientv3.Compare(clientv3.Value(statusKey), "=", _executorStatusRunningJSON)) - // b) Check that the assigned_state key hasn't been changed by another process. + // b) Check that neither the assigned_state nor shard metrics were modified concurrently. comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(assignedState), "=", modRevision)) + comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(shardMetricsKey), "=", metricsModRevision)) // c) Check that the cache is up to date. cmp, err := s.shardCache.GetExecutorModRevisionCmp(namespace) if err != nil { From b624a0031ca7f3940f47c9e06ab72e3dd3d2bc4a Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 23:39:56 +0200 Subject: [PATCH 07/62] fix(shard distributor): apply shard metric updates in a second phase to not overload etcd's 128 max ops per txn Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 4ec7f7f0568..8306e1d2c14 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -354,6 +354,12 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, } } now := time.Now().Unix() + type metricsUpdate struct { + key string + value string + modRevision int64 + } + var metricsUpdates []metricsUpdate for shardID, newOwner := range newAssignments { if oldOwner, ok := currentAssignments[shardID]; ok && oldOwner == newOwner { continue @@ -383,8 +389,11 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, if err != nil { return fmt.Errorf("marshal shard metrics: %w", err) } - ops = append(ops, clientv3.OpPut(shardMetricsKey, string(payload))) - comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(shardMetricsKey), "=", metricsModRevision)) + metricsUpdates = append(metricsUpdates, metricsUpdate{ + key: shardMetricsKey, + value: string(payload), + modRevision: metricsModRevision, + }) } // 1. Prepare operations to update executor states and shard ownership, @@ -450,6 +459,20 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return fmt.Errorf("%w: transaction failed, a shard may have been concurrently assigned", store.ErrVersionConflict) } + // Apply shard metrics updates outside the main transaction to stay within etcd's max operations per txn. + for _, update := range metricsUpdates { + txnResp, err := s.client.Txn(ctx). + If(clientv3.Compare(clientv3.ModRevision(update.key), "=", update.modRevision)). + Then(clientv3.OpPut(update.key, update.value)). + Commit() + if err != nil { + return fmt.Errorf("commit shard metrics update: %w", err) + } + if !txnResp.Succeeded { + return fmt.Errorf("%w: shard metrics were concurrently updated", store.ErrVersionConflict) + } + } + return nil } From aad7b2e34781af1edcd1d1c140fe559a64857cd2 Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Mon, 20 Oct 2025 01:37:39 +0200 Subject: [PATCH 08/62] feat(shard distributor): move shard metric updates out of AssignShards txn and retry monotonically Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 124 ++++++++++++++---- 1 file changed, 95 insertions(+), 29 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 8306e1d2c14..5a51e42712a 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -34,6 +34,18 @@ type executorStoreImpl struct { shardCache *shardcache.ShardToExecutorCache } +// shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard +// after the main transaction in AssignShards for exec state. +// Retains metrics to safely merge concurrent updates before retrying. +type shardMetricsUpdate struct { + key string + shardID string + metrics store.ShardMetrics + modRevision int64 + desiredLastMove int64 // intended LastMoveTime for this update + defaultLastUpdate int64 +} + // ExecutorStoreParams defines the dependencies for the etcd store, for use with fx. type ExecutorStoreParams struct { fx.In @@ -325,7 +337,7 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, // Compute shard moves to update last_move_time metrics when ownership changes. // Read current assignments for the namespace and compare with the new state. - // Any concurrent change will be caught by the revision comparisons below. + // Concurrent changes will be caught by the revision comparisons later. currentAssignments := make(map[string]string) // shardID -> executorID executorPrefix := etcdkeys.BuildExecutorPrefix(s.prefix, namespace) resp, err := s.client.Get(ctx, executorPrefix, clientv3.WithPrefix()) @@ -354,17 +366,13 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, } } now := time.Now().Unix() - type metricsUpdate struct { - key string - value string - modRevision int64 - } - var metricsUpdates []metricsUpdate + // Collect metric updates now so we can apply them after committing the main transaction. + var metricsUpdates []shardMetricsUpdate for shardID, newOwner := range newAssignments { if oldOwner, ok := currentAssignments[shardID]; ok && oldOwner == newOwner { continue } - // Owner changed or new shard: update metrics.last_move_time while preserving existing metrics when present. + // For a new or moved shard, update last_move_time while keeping existing metrics if available. shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) if err != nil { return fmt.Errorf("build shard metrics key: %w", err) @@ -384,15 +392,15 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, shardMetrics.SmoothedLoad = 0 shardMetrics.LastUpdateTime = now } - shardMetrics.LastMoveTime = now - payload, err := json.Marshal(shardMetrics) - if err != nil { - return fmt.Errorf("marshal shard metrics: %w", err) - } - metricsUpdates = append(metricsUpdates, metricsUpdate{ - key: shardMetricsKey, - value: string(payload), - modRevision: metricsModRevision, + // Do not set LastMoveTime here, it will be applied later to avoid overwriting + // a newer timestamp if a concurrent rebalance has already updated it. + metricsUpdates = append(metricsUpdates, shardMetricsUpdate{ + key: shardMetricsKey, + shardID: shardID, + metrics: shardMetrics, + modRevision: metricsModRevision, + desiredLastMove: now, + defaultLastUpdate: shardMetrics.LastUpdateTime, }) } @@ -460,22 +468,80 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, } // Apply shard metrics updates outside the main transaction to stay within etcd's max operations per txn. - for _, update := range metricsUpdates { - txnResp, err := s.client.Txn(ctx). - If(clientv3.Compare(clientv3.ModRevision(update.key), "=", update.modRevision)). - Then(clientv3.OpPut(update.key, update.value)). - Commit() - if err != nil { - return fmt.Errorf("commit shard metrics update: %w", err) - } - if !txnResp.Succeeded { - return fmt.Errorf("%w: shard metrics were concurrently updated", store.ErrVersionConflict) - } - } + s.applyShardMetricsUpdates(ctx, namespace, metricsUpdates) return nil } +// applyShardMetricsUpdates updates shard metrics (like last_move_time) after AssignShards. +// Decided to run these writes outside the primary transaction +// so we are less likely to exceed etcd's max txn-op threshold (128?), and we retry +// logs failures instead of failing the main assignment. +func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { + for i := range updates { + update := &updates[i] + + for { + // If a newer rebalance already set a later LastMoveTime, there's nothing left for this iteration. + if update.metrics.LastMoveTime >= update.desiredLastMove { + break + } + + update.metrics.LastMoveTime = update.desiredLastMove + + payload, err := json.Marshal(update.metrics) + if err != nil { + // Log and move on. failing metrics formatting should not invalidate the finished assignment. + s.logger.Warn("failed to marshal shard metrics after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + + txnResp, err := s.client.Txn(ctx). + If(clientv3.Compare(clientv3.ModRevision(update.key), "=", update.modRevision)). + Then(clientv3.OpPut(update.key, string(payload))). + Commit() + if err != nil { + // log and abandon this shard rather than propagating an error after assignments commit. + s.logger.Warn("failed to commit shard metrics update after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + + if txnResp.Succeeded { + break + } + + if ctx.Err() != nil { + s.logger.Warn("context canceled while updating shard metrics", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(ctx.Err())) + return + } + + // Another writer beat us. pull the latest metrics so we can merge their view and retry. + metricsResp, err := s.client.Get(ctx, update.key) + if err != nil { + // Unable to observe the conflicting write, so we skip this shard and keep the assignment result. + s.logger.Warn("failed to refresh shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + + update.modRevision = 0 + if len(metricsResp.Kvs) > 0 { + update.modRevision = metricsResp.Kvs[0].ModRevision + if err := json.Unmarshal(metricsResp.Kvs[0].Value, &update.metrics); err != nil { + // If the value is corrupt we cannot safely merge, so we abandon this shard's metrics update. + s.logger.Warn("failed to unmarshal shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + } else { + update.metrics = store.ShardMetrics{ + SmoothedLoad: 0, + LastUpdateTime: update.defaultLastUpdate, + } + update.modRevision = 0 + } + } + } +} + func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, executorID string) error { assignedState, err := etcdkeys.BuildExecutorKey(s.prefix, namespace, executorID, etcdkeys.ExecutorAssignedStateKey) if err != nil { From 6360f8a4eca6330b291ee790ff1613c62819faed Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Mon, 20 Oct 2025 12:17:21 +0200 Subject: [PATCH 09/62] fix(shard distributor): keep NamespaceState revisions tied to assignments Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 5a51e42712a..ed147701ab0 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -276,17 +276,11 @@ func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*st shardMetrics[shardID] = shardMetric } - // Compute a global revision that reflects both executor and shard state. - globalRevision := resp.Header.Revision - if shardResp.Header.Revision > globalRevision { - globalRevision = shardResp.Header.Revision - } - return &store.NamespaceState{ Executors: heartbeatStates, ShardMetrics: shardMetrics, ShardAssignments: assignedStates, - GlobalRevision: globalRevision, + GlobalRevision: resp.Header.Revision, }, nil } From 1536d0aff6dca4372fa19efc7a23ad54fb4338f4 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:24:36 +0200 Subject: [PATCH 10/62] refactor(shard distributor): use shard cache and clock for preparing shard metrics, move out to staging to separate function Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 142 +++++++++--------- 1 file changed, 70 insertions(+), 72 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index ed147701ab0..cac714d3a12 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -14,6 +14,7 @@ import ( clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/fx" + "github.com/uber/cadence/common/clock" "github.com/uber/cadence/common/log" "github.com/uber/cadence/common/log/tag" "github.com/uber/cadence/common/types" @@ -32,11 +33,11 @@ type executorStoreImpl struct { prefix string logger log.Logger shardCache *shardcache.ShardToExecutorCache + timeSource clock.TimeSource } // shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard // after the main transaction in AssignShards for exec state. -// Retains metrics to safely merge concurrent updates before retrying. type shardMetricsUpdate struct { key string shardID string @@ -50,10 +51,11 @@ type shardMetricsUpdate struct { type ExecutorStoreParams struct { fx.In - Client *clientv3.Client `optional:"true"` - Cfg config.ShardDistribution - Lifecycle fx.Lifecycle - Logger log.Logger + Client *clientv3.Client `optional:"true"` + Cfg config.ShardDistribution + Lifecycle fx.Lifecycle + Logger log.Logger + TimeSource clock.TimeSource } // NewStore creates a new etcd-backed store and provides it to the fx application. @@ -86,11 +88,17 @@ func NewStore(p ExecutorStoreParams) (store.Store, error) { shardCache := shardcache.NewShardToExecutorCache(etcdCfg.Prefix, etcdClient, p.Logger) + timeSource := p.TimeSource + if timeSource == nil { + timeSource = clock.NewRealTimeSource() + } + store := &executorStoreImpl{ client: etcdClient, prefix: etcdCfg.Prefix, logger: p.Logger, shardCache: shardCache, + timeSource: timeSource, } p.Lifecycle.Append(fx.StartStopHook(store.Start, store.Stop)) @@ -329,73 +337,9 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, var ops []clientv3.Op var comparisons []clientv3.Cmp - // Compute shard moves to update last_move_time metrics when ownership changes. - // Read current assignments for the namespace and compare with the new state. - // Concurrent changes will be caught by the revision comparisons later. - currentAssignments := make(map[string]string) // shardID -> executorID - executorPrefix := etcdkeys.BuildExecutorPrefix(s.prefix, namespace) - resp, err := s.client.Get(ctx, executorPrefix, clientv3.WithPrefix()) + metricsUpdates, err := s.prepareShardMetricsUpdates(ctx, namespace, request.NewState.ShardAssignments) if err != nil { - return fmt.Errorf("get current assignments: %w", err) - } - for _, kv := range resp.Kvs { - executorID, keyType, keyErr := etcdkeys.ParseExecutorKey(s.prefix, namespace, string(kv.Key)) - if keyErr != nil || keyType != etcdkeys.ExecutorAssignedStateKey { - continue - } - var state store.AssignedState - if err := json.Unmarshal(kv.Value, &state); err != nil { - return fmt.Errorf("unmarshal current assigned state: %w", err) - } - for shardID := range state.AssignedShards { - currentAssignments[shardID] = executorID - } - } - - // Build new owner map and detect moved shards. - newAssignments := make(map[string]string) - for executorID, state := range request.NewState.ShardAssignments { - for shardID := range state.AssignedShards { - newAssignments[shardID] = executorID - } - } - now := time.Now().Unix() - // Collect metric updates now so we can apply them after committing the main transaction. - var metricsUpdates []shardMetricsUpdate - for shardID, newOwner := range newAssignments { - if oldOwner, ok := currentAssignments[shardID]; ok && oldOwner == newOwner { - continue - } - // For a new or moved shard, update last_move_time while keeping existing metrics if available. - shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) - if err != nil { - return fmt.Errorf("build shard metrics key: %w", err) - } - var shardMetrics store.ShardMetrics - metricsModRevision := int64(0) - metricsResp, err := s.client.Get(ctx, shardMetricsKey) - if err != nil { - return fmt.Errorf("get shard metrics: %w", err) - } - if len(metricsResp.Kvs) > 0 { - metricsModRevision = metricsResp.Kvs[0].ModRevision - if err := json.Unmarshal(metricsResp.Kvs[0].Value, &shardMetrics); err != nil { - return fmt.Errorf("unmarshal shard metrics: %w", err) - } - } else { - shardMetrics.SmoothedLoad = 0 - shardMetrics.LastUpdateTime = now - } - // Do not set LastMoveTime here, it will be applied later to avoid overwriting - // a newer timestamp if a concurrent rebalance has already updated it. - metricsUpdates = append(metricsUpdates, shardMetricsUpdate{ - key: shardMetricsKey, - shardID: shardID, - metrics: shardMetrics, - modRevision: metricsModRevision, - desiredLastMove: now, - defaultLastUpdate: shardMetrics.LastUpdateTime, - }) + return fmt.Errorf("prepare shard metrics: %w", err) } // 1. Prepare operations to update executor states and shard ownership, @@ -467,6 +411,60 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } +func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { + var updates []shardMetricsUpdate + + for executorID, state := range newAssignments { + for shardID := range state.AssignedShards { + now := s.timeSource.Now().Unix() + + oldOwner, err := s.shardCache.GetShardOwner(ctx, namespace, shardID) + if err != nil && !errors.Is(err, store.ErrShardNotFound) { + return nil, fmt.Errorf("lookup cached shard owner: %w", err) + } + + // we should just skip if the owner hasn't changed + if err == nil && oldOwner == executorID { + continue + } + + shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) + if err != nil { + return nil, fmt.Errorf("build shard metrics key: %w", err) + } + + metricsResp, err := s.client.Get(ctx, shardMetricsKey) + if err != nil { + return nil, fmt.Errorf("get shard metrics: %w", err) + } + + metrics := store.ShardMetrics{} + modRevision := int64(0) + + if len(metricsResp.Kvs) > 0 { + modRevision = metricsResp.Kvs[0].ModRevision + if err := json.Unmarshal(metricsResp.Kvs[0].Value, &metrics); err != nil { + return nil, fmt.Errorf("unmarshal shard metrics: %w", err) + } + } else { + metrics.SmoothedLoad = 0 + metrics.LastUpdateTime = now + } + + updates = append(updates, shardMetricsUpdate{ + key: shardMetricsKey, + shardID: shardID, + metrics: metrics, + modRevision: modRevision, + desiredLastMove: now, + defaultLastUpdate: metrics.LastUpdateTime, + }) + } + } + + return updates, nil +} + // applyShardMetricsUpdates updates shard metrics (like last_move_time) after AssignShards. // Decided to run these writes outside the primary transaction // so we are less likely to exceed etcd's max txn-op threshold (128?), and we retry @@ -578,7 +576,7 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, if err != nil { return fmt.Errorf("get shard metrics: %w", err) } - now := time.Now().Unix() + now := s.timeSource.Now().Unix() metricsModRevision := int64(0) if len(metricsResp.Kvs) > 0 { metricsModRevision = metricsResp.Kvs[0].ModRevision From f316fbfe39077bee7e8b38a10f33d83acbc5dbd6 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 22 Oct 2025 18:32:33 +0200 Subject: [PATCH 11/62] test(shard distributor): BuildShardPrefix, BuildShardKey, ParseShardKey Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/etcdkeys/etcdkeys_test.go | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go index c98a01f2b57..1173cc89e83 100644 --- a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go +++ b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go @@ -16,6 +16,11 @@ func TestBuildExecutorPrefix(t *testing.T) { assert.Equal(t, "/cadence/test-ns/executors/", got) } +func TestBuildShardPrefix(t *testing.T) { + got := BuildShardPrefix("/cadence", "test-ns") + assert.Equal(t, "/cadence/test-ns/shards/", got) +} + func TestBuildExecutorKey(t *testing.T) { got, err := BuildExecutorKey("/cadence", "test-ns", "exec-1", "heartbeat") assert.NoError(t, err) @@ -27,6 +32,17 @@ func TestBuildExecutorKeyFail(t *testing.T) { assert.ErrorContains(t, err, "invalid key type: invalid") } +func TestBuildShardKey(t *testing.T) { + got, err := BuildShardKey("/cadence", "test-ns", "shard-1", ShardMetricsKey) + assert.NoError(t, err) + assert.Equal(t, "/cadence/test-ns/shards/shard-1/metrics", got) +} + +func TestBuildShardKeyFail(t *testing.T) { + _, err := BuildShardKey("/cadence", "test-ns", "shard-1", "not-valid") + assert.ErrorContains(t, err, "invalid shard key type: not-valid") +} + func TestParseExecutorKey(t *testing.T) { // Valid key executorID, keyType, err := ParseExecutorKey("/cadence", "test-ns", "/cadence/test-ns/executors/exec-1/heartbeat") @@ -42,3 +58,19 @@ func TestParseExecutorKey(t *testing.T) { _, _, err = ParseExecutorKey("/cadence", "test-ns", "/cadence/test-ns/executors/exec-1/heartbeat/extra") assert.ErrorContains(t, err, "unexpected key format: /cadence/test-ns/executors/exec-1/heartbeat/extra") } + +func TestParseShardKey(t *testing.T) { + // Valid key + shardID, keyType, err := ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/metrics") + assert.NoError(t, err) + assert.Equal(t, "shard-7", shardID) + assert.Equal(t, ShardMetricsKey, keyType) + + // Prefix missing + _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/other/shards/shard-7/metrics") + assert.ErrorContains(t, err, "key '/cadence/other/shards/shard-7/metrics' does not have expected prefix '/cadence/test-ns/shards/'") + + // Unexpected format + _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/metrics/extra") + assert.ErrorContains(t, err, "unexpected shard key format: /cadence/test-ns/shards/shard-7/metrics/extra") +} From 4524da9ab15a284944b70651681ba54ca1b8921a Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Fri, 24 Oct 2025 00:35:46 +0200 Subject: [PATCH 12/62] feat(shard distributor): simplify shard metrics updates Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 105 +++++------------- 1 file changed, 29 insertions(+), 76 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index cac714d3a12..3dc2fc6f73c 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -39,12 +39,10 @@ type executorStoreImpl struct { // shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard // after the main transaction in AssignShards for exec state. type shardMetricsUpdate struct { - key string - shardID string - metrics store.ShardMetrics - modRevision int64 - desiredLastMove int64 // intended LastMoveTime for this update - defaultLastUpdate int64 + key string + shardID string + metrics store.ShardMetrics + desiredLastMove int64 // intended LastMoveTime for this update } // ExecutorStoreParams defines the dependencies for the etcd store, for use with fx. @@ -439,10 +437,8 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name } metrics := store.ShardMetrics{} - modRevision := int64(0) if len(metricsResp.Kvs) > 0 { - modRevision = metricsResp.Kvs[0].ModRevision if err := json.Unmarshal(metricsResp.Kvs[0].Value, &metrics); err != nil { return nil, fmt.Errorf("unmarshal shard metrics: %w", err) } @@ -452,12 +448,10 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name } updates = append(updates, shardMetricsUpdate{ - key: shardMetricsKey, - shardID: shardID, - metrics: metrics, - modRevision: modRevision, - desiredLastMove: now, - defaultLastUpdate: metrics.LastUpdateTime, + key: shardMetricsKey, + shardID: shardID, + metrics: metrics, + desiredLastMove: now, }) } } @@ -465,71 +459,30 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name return updates, nil } -// applyShardMetricsUpdates updates shard metrics (like last_move_time) after AssignShards. -// Decided to run these writes outside the primary transaction -// so we are less likely to exceed etcd's max txn-op threshold (128?), and we retry -// logs failures instead of failing the main assignment. +// applyShardMetricsUpdates updates shard metrics. +// Is intentionally made tolerant of failures since the data is telemetry only. func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { - for i := range updates { - update := &updates[i] + for _, update := range updates { + update.metrics.LastMoveTime = update.desiredLastMove - for { - // If a newer rebalance already set a later LastMoveTime, there's nothing left for this iteration. - if update.metrics.LastMoveTime >= update.desiredLastMove { - break - } - - update.metrics.LastMoveTime = update.desiredLastMove - - payload, err := json.Marshal(update.metrics) - if err != nil { - // Log and move on. failing metrics formatting should not invalidate the finished assignment. - s.logger.Warn("failed to marshal shard metrics after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } - - txnResp, err := s.client.Txn(ctx). - If(clientv3.Compare(clientv3.ModRevision(update.key), "=", update.modRevision)). - Then(clientv3.OpPut(update.key, string(payload))). - Commit() - if err != nil { - // log and abandon this shard rather than propagating an error after assignments commit. - s.logger.Warn("failed to commit shard metrics update after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } - - if txnResp.Succeeded { - break - } - - if ctx.Err() != nil { - s.logger.Warn("context canceled while updating shard metrics", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(ctx.Err())) - return - } - - // Another writer beat us. pull the latest metrics so we can merge their view and retry. - metricsResp, err := s.client.Get(ctx, update.key) - if err != nil { - // Unable to observe the conflicting write, so we skip this shard and keep the assignment result. - s.logger.Warn("failed to refresh shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } + payload, err := json.Marshal(update.metrics) + if err != nil { + s.logger.Warn( + "failed to marshal shard metrics after assignment", + tag.ShardNamespace(namespace), + tag.ShardKey(update.shardID), + tag.Error(err), + ) + continue + } - update.modRevision = 0 - if len(metricsResp.Kvs) > 0 { - update.modRevision = metricsResp.Kvs[0].ModRevision - if err := json.Unmarshal(metricsResp.Kvs[0].Value, &update.metrics); err != nil { - // If the value is corrupt we cannot safely merge, so we abandon this shard's metrics update. - s.logger.Warn("failed to unmarshal shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } - } else { - update.metrics = store.ShardMetrics{ - SmoothedLoad: 0, - LastUpdateTime: update.defaultLastUpdate, - } - update.modRevision = 0 - } + if _, err := s.client.Put(ctx, update.key, string(payload)); err != nil { + s.logger.Warn( + "failed to update shard metrics", + tag.ShardNamespace(namespace), + tag.ShardKey(update.shardID), + tag.Error(err), + ) } } } From 126f7257342077e37213eba394d0f5ddef324699 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:18:38 +0200 Subject: [PATCH 13/62] refactor(shard distributor): ShardMetrics renamed to ShardStatistics. And more idiomatic naming of collection vs singular type Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/etcdkeys/etcdkeys.go | 4 +- .../store/etcd/etcdkeys/etcdkeys_test.go | 16 +-- .../store/etcd/executorstore/etcdstore.go | 102 +++++++++--------- .../etcd/executorstore/etcdstore_test.go | 34 +++--- service/sharddistributor/store/state.go | 4 +- 5 files changed, 80 insertions(+), 80 deletions(-) diff --git a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go index 998e82af078..906174d0e4d 100644 --- a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go +++ b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go @@ -11,7 +11,7 @@ const ( ExecutorReportedShardsKey = "reported_shards" ExecutorAssignedStateKey = "assigned_state" ShardAssignedKey = "assigned" - ShardMetricsKey = "metrics" + ShardStatisticsKey = "statistics" ) var validKeyTypes = []string{ @@ -64,7 +64,7 @@ func BuildShardPrefix(prefix string, namespace string) string { } func BuildShardKey(prefix string, namespace, shardID, keyType string) (string, error) { - if keyType != ShardAssignedKey && keyType != ShardMetricsKey { + if keyType != ShardAssignedKey && keyType != ShardStatisticsKey { return "", fmt.Errorf("invalid shard key type: %s", keyType) } return fmt.Sprintf("%s%s/%s", BuildShardPrefix(prefix, namespace), shardID, keyType), nil diff --git a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go index 1173cc89e83..b86cef4e095 100644 --- a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go +++ b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go @@ -33,9 +33,9 @@ func TestBuildExecutorKeyFail(t *testing.T) { } func TestBuildShardKey(t *testing.T) { - got, err := BuildShardKey("/cadence", "test-ns", "shard-1", ShardMetricsKey) + got, err := BuildShardKey("/cadence", "test-ns", "shard-1", ShardStatisticsKey) assert.NoError(t, err) - assert.Equal(t, "/cadence/test-ns/shards/shard-1/metrics", got) + assert.Equal(t, "/cadence/test-ns/shards/shard-1/statistics", got) } func TestBuildShardKeyFail(t *testing.T) { @@ -61,16 +61,16 @@ func TestParseExecutorKey(t *testing.T) { func TestParseShardKey(t *testing.T) { // Valid key - shardID, keyType, err := ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/metrics") + shardID, keyType, err := ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/statistics") assert.NoError(t, err) assert.Equal(t, "shard-7", shardID) - assert.Equal(t, ShardMetricsKey, keyType) + assert.Equal(t, ShardStatisticsKey, keyType) // Prefix missing - _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/other/shards/shard-7/metrics") - assert.ErrorContains(t, err, "key '/cadence/other/shards/shard-7/metrics' does not have expected prefix '/cadence/test-ns/shards/'") + _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/other/shards/shard-7/statistics") + assert.ErrorContains(t, err, "key '/cadence/other/shards/shard-7/statistics' does not have expected prefix '/cadence/test-ns/shards/'") // Unexpected format - _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/metrics/extra") - assert.ErrorContains(t, err, "unexpected shard key format: /cadence/test-ns/shards/shard-7/metrics/extra") + _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/statistics/extra") + assert.ErrorContains(t, err, "unexpected shard key format: /cadence/test-ns/shards/shard-7/statistics/extra") } diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 3dc2fc6f73c..4867291a098 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -36,12 +36,12 @@ type executorStoreImpl struct { timeSource clock.TimeSource } -// shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard +// shardMetricsUpdate tracks the etcd key and statistics used to update a shard // after the main transaction in AssignShards for exec state. type shardMetricsUpdate struct { key string shardID string - metrics store.ShardMetrics + metrics store.ShardStatistics desiredLastMove int64 // intended LastMoveTime for this update } @@ -219,7 +219,7 @@ func (s *executorStoreImpl) GetHeartbeat(ctx context.Context, namespace string, func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*store.NamespaceState, error) { heartbeatStates := make(map[string]store.HeartbeatState) assignedStates := make(map[string]store.AssignedState) - shardMetrics := make(map[string]store.ShardMetrics) + shardStats := make(map[string]store.ShardStatistics) executorPrefix := etcdkeys.BuildExecutorPrefix(s.prefix, namespace) resp, err := s.client.Get(ctx, executorPrefix, clientv3.WithPrefix()) @@ -272,19 +272,19 @@ func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*st if err != nil { continue } - if shardKeyType != etcdkeys.ShardMetricsKey { + if shardKeyType != etcdkeys.ShardStatisticsKey { continue } - var shardMetric store.ShardMetrics - if err := json.Unmarshal(kv.Value, &shardMetric); err != nil { + var shardStatistic store.ShardStatistics + if err := json.Unmarshal(kv.Value, &shardStatistic); err != nil { continue } - shardMetrics[shardID] = shardMetric + shardStats[shardID] = shardStatistic } return &store.NamespaceState{ Executors: heartbeatStates, - ShardMetrics: shardMetrics, + ShardStats: shardStats, ShardAssignments: assignedStates, GlobalRevision: resp.Header.Revision, }, nil @@ -335,9 +335,9 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, var ops []clientv3.Op var comparisons []clientv3.Cmp - metricsUpdates, err := s.prepareShardMetricsUpdates(ctx, namespace, request.NewState.ShardAssignments) + statsUpdates, err := s.prepareShardStatisticsUpdates(ctx, namespace, request.NewState.ShardAssignments) if err != nil { - return fmt.Errorf("prepare shard metrics: %w", err) + return fmt.Errorf("prepare shard statistics: %w", err) } // 1. Prepare operations to update executor states and shard ownership, @@ -403,13 +403,13 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return fmt.Errorf("%w: transaction failed, a shard may have been concurrently assigned", store.ErrVersionConflict) } - // Apply shard metrics updates outside the main transaction to stay within etcd's max operations per txn. - s.applyShardMetricsUpdates(ctx, namespace, metricsUpdates) + // Apply shard statistics updates outside the main transaction to stay within etcd's max operations per txn. + s.applyShardStatisticsUpdates(ctx, namespace, statsUpdates) return nil } -func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { +func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { var updates []shardMetricsUpdate for executorID, state := range newAssignments { @@ -426,31 +426,31 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name continue } - shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) + shardStatisticsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) if err != nil { - return nil, fmt.Errorf("build shard metrics key: %w", err) + return nil, fmt.Errorf("build shard statistics key: %w", err) } - metricsResp, err := s.client.Get(ctx, shardMetricsKey) + statsResp, err := s.client.Get(ctx, shardStatisticsKey) if err != nil { - return nil, fmt.Errorf("get shard metrics: %w", err) + return nil, fmt.Errorf("get shard statistics: %w", err) } - metrics := store.ShardMetrics{} + stats := store.ShardStatistics{} - if len(metricsResp.Kvs) > 0 { - if err := json.Unmarshal(metricsResp.Kvs[0].Value, &metrics); err != nil { - return nil, fmt.Errorf("unmarshal shard metrics: %w", err) + if len(statsResp.Kvs) > 0 { + if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + return nil, fmt.Errorf("unmarshal shard statistics: %w", err) } } else { - metrics.SmoothedLoad = 0 - metrics.LastUpdateTime = now + stats.SmoothedLoad = 0 + stats.LastUpdateTime = now } updates = append(updates, shardMetricsUpdate{ - key: shardMetricsKey, + key: shardStatisticsKey, shardID: shardID, - metrics: metrics, + metrics: stats, desiredLastMove: now, }) } @@ -459,16 +459,16 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name return updates, nil } -// applyShardMetricsUpdates updates shard metrics. +// applyShardStatisticsUpdates updates shard statistics. // Is intentionally made tolerant of failures since the data is telemetry only. -func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { +func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { for _, update := range updates { update.metrics.LastMoveTime = update.desiredLastMove payload, err := json.Marshal(update.metrics) if err != nil { s.logger.Warn( - "failed to marshal shard metrics after assignment", + "failed to marshal shard statistics after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err), @@ -478,7 +478,7 @@ func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namesp if _, err := s.client.Put(ctx, update.key, string(payload)); err != nil { s.logger.Warn( - "failed to update shard metrics", + "failed to update shard statistics", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err), @@ -496,21 +496,21 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, if err != nil { return fmt.Errorf("build executor status key: %w", err) } - shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) + shardStatsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) if err != nil { - return fmt.Errorf("build shard metrics key: %w", err) + return fmt.Errorf("build shard statistics key: %w", err) } // Use a read-modify-write loop to handle concurrent updates safely. for { - // 1. Get the current assigned state of the executor and prepare the shard metrics. + // 1. Get the current assigned state of the executor and prepare the shard statistics. resp, err := s.client.Get(ctx, assignedState) if err != nil { return fmt.Errorf("get executor state: %w", err) } var state store.AssignedState - var shardMetrics store.ShardMetrics + var shardStats store.ShardStatistics modRevision := int64(0) // A revision of 0 means the key doesn't exist yet. if len(resp.Kvs) > 0 { @@ -525,26 +525,26 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, state.AssignedShards = make(map[string]*types.ShardAssignment) } - metricsResp, err := s.client.Get(ctx, shardMetricsKey) + statsResp, err := s.client.Get(ctx, shardStatsKey) if err != nil { - return fmt.Errorf("get shard metrics: %w", err) + return fmt.Errorf("get shard statistics: %w", err) } now := s.timeSource.Now().Unix() - metricsModRevision := int64(0) - if len(metricsResp.Kvs) > 0 { - metricsModRevision = metricsResp.Kvs[0].ModRevision - if err := json.Unmarshal(metricsResp.Kvs[0].Value, &shardMetrics); err != nil { - return fmt.Errorf("unmarshal shard metrics: %w", err) + statsModRevision := int64(0) + if len(statsResp.Kvs) > 0 { + statsModRevision = statsResp.Kvs[0].ModRevision + if err := json.Unmarshal(statsResp.Kvs[0].Value, &shardStats); err != nil { + return fmt.Errorf("unmarshal shard statistics: %w", err) } - // Metrics already exist, update the last move time. + // Statistics already exist, update the last move time. // This can happen if the shard was previously assigned to an executor, and a lookup happens after the executor is deleted, // AssignShard is then called to assign the shard to a new executor. - shardMetrics.LastMoveTime = now + shardStats.LastMoveTime = now } else { - // Metrics don't exist, initialize them. - shardMetrics.SmoothedLoad = 0 - shardMetrics.LastUpdateTime = now - shardMetrics.LastMoveTime = now + // Statistics don't exist, initialize them. + shardStats.SmoothedLoad = 0 + shardStats.LastUpdateTime = now + shardStats.LastMoveTime = now } // 2. Modify the state in memory, adding the new shard if it's not already there. @@ -557,9 +557,9 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, return fmt.Errorf("marshal new assigned state: %w", err) } - newMetricsValue, err := json.Marshal(shardMetrics) + newStatsValue, err := json.Marshal(shardStats) if err != nil { - return fmt.Errorf("marshal new shard metrics: %w", err) + return fmt.Errorf("marshal new shard statistics: %w", err) } var comparisons []clientv3.Cmp @@ -567,9 +567,9 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, // 3. Prepare and commit the transaction with four atomic checks. // a) Check that the executor's status is ACTIVE. comparisons = append(comparisons, clientv3.Compare(clientv3.Value(statusKey), "=", _executorStatusRunningJSON)) - // b) Check that neither the assigned_state nor shard metrics were modified concurrently. + // b) Check that neither the assigned_state nor shard statistics were modified concurrently. comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(assignedState), "=", modRevision)) - comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(shardMetricsKey), "=", metricsModRevision)) + comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(shardStatsKey), "=", statsModRevision)) // c) Check that the cache is up to date. cmp, err := s.shardCache.GetExecutorModRevisionCmp(namespace) if err != nil { @@ -590,7 +590,7 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, If(comparisons...). Then( clientv3.OpPut(assignedState, string(newStateValue)), - clientv3.OpPut(shardMetricsKey, string(newMetricsValue)), + clientv3.OpPut(shardStatsKey, string(newStatsValue)), ). Commit() diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index 3ff69ef60cb..e42eb160697 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -505,27 +505,27 @@ func TestAssignShardErrors(t *testing.T) { assert.ErrorIs(t, err, store.ErrVersionConflict, "Error should be ErrVersionConflict for non-active executor") } -// TestShardMetricsPersistence verifies that shard metrics are preserved on assignment +// TestShardStatisticsPersistence verifies that shard statistics are preserved on assignment // when they already exist, and that GetState exposes them. -func TestShardMetricsPersistence(t *testing.T) { +func TestShardStatisticsPersistence(t *testing.T) { tc := testhelper.SetupStoreTestCluster(t) executorStore := createStore(t, tc) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - executorID := "exec-metrics" - shardID := "shard-metrics" + executorID := "exec-stats" + shardID := "shard-stats" // 1. Setup: ensure executor is ACTIVE require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, store.HeartbeatState{Status: types.ExecutorStatusACTIVE})) - // 2. Pre-create shard metrics as if coming from prior history - m := store.ShardMetrics{SmoothedLoad: 12.5, LastUpdateTime: 1234, LastMoveTime: 5678} - shardMetricsKey, err := etcdkeys.BuildShardKey(tc.EtcdPrefix, tc.Namespace, shardID, etcdkeys.ShardMetricsKey) + // 2. Pre-create shard statistics as if coming from prior history + stats := store.ShardStatistics{SmoothedLoad: 12.5, LastUpdateTime: 1234, LastMoveTime: 5678} + shardStatsKey, err := etcdkeys.BuildShardKey(tc.EtcdPrefix, tc.Namespace, shardID, etcdkeys.ShardStatisticsKey) require.NoError(t, err) - payload, err := json.Marshal(m) + payload, err := json.Marshal(stats) require.NoError(t, err) - _, err = tc.Client.Put(ctx, shardMetricsKey, string(payload)) + _, err = tc.Client.Put(ctx, shardStatsKey, string(payload)) require.NoError(t, err) // 3. Assign the shard via AssignShard (should not clobber existing metrics) @@ -534,19 +534,19 @@ func TestShardMetricsPersistence(t *testing.T) { // 4. Verify via GetState that metrics are preserved and exposed nsState, err := executorStore.GetState(ctx, tc.Namespace) require.NoError(t, err) - require.Contains(t, nsState.ShardMetrics, shardID) - updatedMetrics := nsState.ShardMetrics[shardID] - assert.Equal(t, m.SmoothedLoad, updatedMetrics.SmoothedLoad) - assert.Equal(t, m.LastUpdateTime, updatedMetrics.LastUpdateTime) + require.Contains(t, nsState.ShardStats, shardID) + updatedStats := nsState.ShardStats[shardID] + assert.Equal(t, stats.SmoothedLoad, updatedStats.SmoothedLoad) + assert.Equal(t, stats.LastUpdateTime, updatedStats.LastUpdateTime) // This should be greater than the last move time - assert.Greater(t, updatedMetrics.LastMoveTime, m.LastMoveTime) + assert.Greater(t, updatedStats.LastMoveTime, stats.LastMoveTime) // 5. Also ensure assignment recorded correctly require.Contains(t, nsState.ShardAssignments[executorID].AssignedShards, shardID) } -// TestGetShardMetricsForMissingShard verifies GetState does not report metrics for unknown shards. -func TestGetShardMetricsForMissingShard(t *testing.T) { +// TestGetShardStatisticsForMissingShard verifies GetState does not report statistics for unknown shards. +func TestGetShardStatisticsForMissingShard(t *testing.T) { tc := testhelper.SetupStoreTestCluster(t) executorStore := createStore(t, tc) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) @@ -555,7 +555,7 @@ func TestGetShardMetricsForMissingShard(t *testing.T) { // No metrics are written; GetState should not contain unknown shard st, err := executorStore.GetState(ctx, tc.Namespace) require.NoError(t, err) - assert.NotContains(t, st.ShardMetrics, "unknown") + assert.NotContains(t, st.ShardStats, "unknown") } // --- Test Setup --- diff --git a/service/sharddistributor/store/state.go b/service/sharddistributor/store/state.go index 455f48e1e2a..36222485a56 100644 --- a/service/sharddistributor/store/state.go +++ b/service/sharddistributor/store/state.go @@ -18,7 +18,7 @@ type AssignedState struct { type NamespaceState struct { Executors map[string]HeartbeatState - ShardMetrics map[string]ShardMetrics + ShardStats map[string]ShardStatistics ShardAssignments map[string]AssignedState GlobalRevision int64 } @@ -27,7 +27,7 @@ type ShardState struct { ExecutorID string } -type ShardMetrics struct { +type ShardStatistics struct { SmoothedLoad float64 `json:"smoothed_load"` // EWMA of shard load that persists across executor changes LastUpdateTime int64 `json:"last_update_time"` // heartbeat timestamp that last updated the EWMA LastMoveTime int64 `json:"last_move_time"` // timestamp for the latest reassignment, used for cooldowns From cc53f68425447fc595d707a5d5f77429e330cc99 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Sat, 25 Oct 2025 21:32:05 +0200 Subject: [PATCH 14/62] test(shard distributor): small changes to shard key tests s.t. they look more like executor key tests Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/etcdkeys/etcdkeys_test.go | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go index b86cef4e095..7589fd2ba80 100644 --- a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go +++ b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys_test.go @@ -33,14 +33,14 @@ func TestBuildExecutorKeyFail(t *testing.T) { } func TestBuildShardKey(t *testing.T) { - got, err := BuildShardKey("/cadence", "test-ns", "shard-1", ShardStatisticsKey) + got, err := BuildShardKey("/cadence", "test-ns", "shard-1", "statistics") assert.NoError(t, err) assert.Equal(t, "/cadence/test-ns/shards/shard-1/statistics", got) } func TestBuildShardKeyFail(t *testing.T) { - _, err := BuildShardKey("/cadence", "test-ns", "shard-1", "not-valid") - assert.ErrorContains(t, err, "invalid shard key type: not-valid") + _, err := BuildShardKey("/cadence", "test-ns", "shard-1", "invalid") + assert.ErrorContains(t, err, "invalid shard key type: invalid") } func TestParseExecutorKey(t *testing.T) { @@ -61,16 +61,16 @@ func TestParseExecutorKey(t *testing.T) { func TestParseShardKey(t *testing.T) { // Valid key - shardID, keyType, err := ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/statistics") + shardID, keyType, err := ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-1/statistics") assert.NoError(t, err) - assert.Equal(t, "shard-7", shardID) - assert.Equal(t, ShardStatisticsKey, keyType) + assert.Equal(t, "shard-1", shardID) + assert.Equal(t, "statistics", keyType) // Prefix missing - _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/other/shards/shard-7/statistics") - assert.ErrorContains(t, err, "key '/cadence/other/shards/shard-7/statistics' does not have expected prefix '/cadence/test-ns/shards/'") + _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/other/shards/shard-1/statistics") + assert.ErrorContains(t, err, "key '/cadence/other/shards/shard-1/statistics' does not have expected prefix '/cadence/test-ns/shards/'") // Unexpected format - _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-7/statistics/extra") - assert.ErrorContains(t, err, "unexpected shard key format: /cadence/test-ns/shards/shard-7/statistics/extra") + _, _, err = ParseShardKey("/cadence", "test-ns", "/cadence/test-ns/shards/shard-1/statistics/extra") + assert.ErrorContains(t, err, "unexpected shard key format: /cadence/test-ns/shards/shard-1/statistics/extra") } From 733bbcbee5ec587672e3e10433fc5545f2ab1108 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Sun, 26 Oct 2025 01:14:21 +0200 Subject: [PATCH 15/62] fix(shard distributor): no longer check for key type ShardStatisticsKey in BuildShardKey, as we don't use it Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go index 906174d0e4d..9ba60b8cbb1 100644 --- a/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go +++ b/service/sharddistributor/store/etcd/etcdkeys/etcdkeys.go @@ -64,7 +64,7 @@ func BuildShardPrefix(prefix string, namespace string) string { } func BuildShardKey(prefix string, namespace, shardID, keyType string) (string, error) { - if keyType != ShardAssignedKey && keyType != ShardStatisticsKey { + if keyType != ShardStatisticsKey { return "", fmt.Errorf("invalid shard key type: %s", keyType) } return fmt.Sprintf("%s%s/%s", BuildShardPrefix(prefix, namespace), shardID, keyType), nil From 6816b8e481054fe8bcf6afde34dbd34b846b7879 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 08:31:52 +0100 Subject: [PATCH 16/62] refactor(shard distributor): found a place where I forgot to rename to "statistics" Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 4867291a098..50603eef5d7 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -36,12 +36,12 @@ type executorStoreImpl struct { timeSource clock.TimeSource } -// shardMetricsUpdate tracks the etcd key and statistics used to update a shard -// after the main transaction in AssignShards for exec state. -type shardMetricsUpdate struct { +// shardStatisticsUpdate holds the staged statistics for a shard so we can write them +// to etcd after the main AssignShards transaction commits. +type shardStatisticsUpdate struct { key string shardID string - metrics store.ShardStatistics + stats store.ShardStatistics desiredLastMove int64 // intended LastMoveTime for this update } @@ -261,7 +261,7 @@ func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*st assignedStates[executorID] = assigned } - // Fetch shard-level metrics stored under shard namespace keys. + // Fetch shard-level statistics stored under shard namespace keys. shardPrefix := etcdkeys.BuildShardPrefix(s.prefix, namespace) shardResp, err := s.client.Get(ctx, shardPrefix, clientv3.WithPrefix()) if err != nil { @@ -409,8 +409,8 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } -func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { - var updates []shardMetricsUpdate +func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardStatisticsUpdate, error) { + var updates []shardStatisticsUpdate for executorID, state := range newAssignments { for shardID := range state.AssignedShards { @@ -447,10 +447,10 @@ func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, n stats.LastUpdateTime = now } - updates = append(updates, shardMetricsUpdate{ + updates = append(updates, shardStatisticsUpdate{ key: shardStatisticsKey, shardID: shardID, - metrics: stats, + stats: stats, desiredLastMove: now, }) } @@ -461,11 +461,11 @@ func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, n // applyShardStatisticsUpdates updates shard statistics. // Is intentionally made tolerant of failures since the data is telemetry only. -func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { +func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardStatisticsUpdate) { for _, update := range updates { - update.metrics.LastMoveTime = update.desiredLastMove + update.stats.LastMoveTime = update.desiredLastMove - payload, err := json.Marshal(update.metrics) + payload, err := json.Marshal(update.stats) if err != nil { s.logger.Warn( "failed to marshal shard statistics after assignment", From f97e0cf6d7c975fc3935424a3acab180c51c5b8d Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 18:20:49 +0100 Subject: [PATCH 17/62] fix(shard distributor): move non-exported helpers to end of file to follow conventions Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 156 +++++++++--------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 50603eef5d7..1630b8b3efc 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -409,84 +409,6 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } -func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardStatisticsUpdate, error) { - var updates []shardStatisticsUpdate - - for executorID, state := range newAssignments { - for shardID := range state.AssignedShards { - now := s.timeSource.Now().Unix() - - oldOwner, err := s.shardCache.GetShardOwner(ctx, namespace, shardID) - if err != nil && !errors.Is(err, store.ErrShardNotFound) { - return nil, fmt.Errorf("lookup cached shard owner: %w", err) - } - - // we should just skip if the owner hasn't changed - if err == nil && oldOwner == executorID { - continue - } - - shardStatisticsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) - if err != nil { - return nil, fmt.Errorf("build shard statistics key: %w", err) - } - - statsResp, err := s.client.Get(ctx, shardStatisticsKey) - if err != nil { - return nil, fmt.Errorf("get shard statistics: %w", err) - } - - stats := store.ShardStatistics{} - - if len(statsResp.Kvs) > 0 { - if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { - return nil, fmt.Errorf("unmarshal shard statistics: %w", err) - } - } else { - stats.SmoothedLoad = 0 - stats.LastUpdateTime = now - } - - updates = append(updates, shardStatisticsUpdate{ - key: shardStatisticsKey, - shardID: shardID, - stats: stats, - desiredLastMove: now, - }) - } - } - - return updates, nil -} - -// applyShardStatisticsUpdates updates shard statistics. -// Is intentionally made tolerant of failures since the data is telemetry only. -func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardStatisticsUpdate) { - for _, update := range updates { - update.stats.LastMoveTime = update.desiredLastMove - - payload, err := json.Marshal(update.stats) - if err != nil { - s.logger.Warn( - "failed to marshal shard statistics after assignment", - tag.ShardNamespace(namespace), - tag.ShardKey(update.shardID), - tag.Error(err), - ) - continue - } - - if _, err := s.client.Put(ctx, update.key, string(payload)); err != nil { - s.logger.Warn( - "failed to update shard statistics", - tag.ShardNamespace(namespace), - tag.ShardKey(update.shardID), - tag.Error(err), - ) - } - } -} - func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, executorID string) error { assignedState, err := etcdkeys.BuildExecutorKey(s.prefix, namespace, executorID, etcdkeys.ExecutorAssignedStateKey) if err != nil { @@ -661,3 +583,81 @@ func (s *executorStoreImpl) DeleteExecutors(ctx context.Context, namespace strin func (s *executorStoreImpl) GetShardOwner(ctx context.Context, namespace, shardID string) (string, error) { return s.shardCache.GetShardOwner(ctx, namespace, shardID) } + +func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardStatisticsUpdate, error) { + var updates []shardStatisticsUpdate + + for executorID, state := range newAssignments { + for shardID := range state.AssignedShards { + now := s.timeSource.Now().Unix() + + oldOwner, err := s.shardCache.GetShardOwner(ctx, namespace, shardID) + if err != nil && !errors.Is(err, store.ErrShardNotFound) { + return nil, fmt.Errorf("lookup cached shard owner: %w", err) + } + + // we should just skip if the owner hasn't changed + if err == nil && oldOwner == executorID { + continue + } + + shardStatisticsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) + if err != nil { + return nil, fmt.Errorf("build shard statistics key: %w", err) + } + + statsResp, err := s.client.Get(ctx, shardStatisticsKey) + if err != nil { + return nil, fmt.Errorf("get shard statistics: %w", err) + } + + stats := store.ShardStatistics{} + + if len(statsResp.Kvs) > 0 { + if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + return nil, fmt.Errorf("unmarshal shard statistics: %w", err) + } + } else { + stats.SmoothedLoad = 0 + stats.LastUpdateTime = now + } + + updates = append(updates, shardStatisticsUpdate{ + key: shardStatisticsKey, + shardID: shardID, + stats: stats, + desiredLastMove: now, + }) + } + } + + return updates, nil +} + +// applyShardStatisticsUpdates updates shard statistics. +// Is intentionally made tolerant of failures since the data is telemetry only. +func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardStatisticsUpdate) { + for _, update := range updates { + update.stats.LastMoveTime = update.desiredLastMove + + payload, err := json.Marshal(update.stats) + if err != nil { + s.logger.Warn( + "failed to marshal shard statistics after assignment", + tag.ShardNamespace(namespace), + tag.ShardKey(update.shardID), + tag.Error(err), + ) + continue + } + + if _, err := s.client.Put(ctx, update.key, string(payload)); err != nil { + s.logger.Warn( + "failed to update shard statistics", + tag.ShardNamespace(namespace), + tag.ShardKey(update.shardID), + tag.Error(err), + ) + } + } +} From 92ba56cec6179a35eaee68ded2cb4716f86c0e42 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:26:41 +0100 Subject: [PATCH 18/62] feat: function to update shard statistics from heartbeat (currently no ewma) Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 1630b8b3efc..5ea8e20c4c7 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -8,6 +8,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "strconv" "time" @@ -149,9 +150,94 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec if err != nil { return fmt.Errorf("record heartbeat: %w", err) } + + s.updateShardStatisticsFromHeartbeat(ctx, namespace, executorID, request.ReportedShards) + return nil } +func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { + if len(reported) == 0 { + return + } + + now := s.timeSource.Now().Unix() + + for shardID, report := range reported { + if report == nil { + continue + } + + load := report.ShardLoad + if math.IsNaN(load) || math.IsInf(load, 0) { + continue + } + + shardStatsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) + if err != nil { + s.logger.Warn( + "failed to build shard statistics key from heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + statsResp, err := s.client.Get(ctx, shardStatsKey) + if err != nil { + s.logger.Warn( + "failed to read shard statistics for heartbeat update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + var stats store.ShardStatistics + if len(statsResp.Kvs) > 0 { + if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + s.logger.Warn( + "failed to unmarshal shard statistics for heartbeat update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + } + + stats.SmoothedLoad = load + stats.LastUpdateTime = now + + payload, err := json.Marshal(stats) + if err != nil { + s.logger.Warn( + "failed to marshal shard statistics after heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + if _, err := s.client.Put(ctx, shardStatsKey, string(payload)); err != nil { + s.logger.Warn( + "failed to persist shard statistics from heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + } + } +} + // GetHeartbeat retrieves the last known heartbeat state for a single executor. func (s *executorStoreImpl) GetHeartbeat(ctx context.Context, namespace string, executorID string) (*store.HeartbeatState, *store.AssignedState, error) { // The prefix for all keys related to a single executor. From 154e9be2c5be5d48e9f20787413994f84ba72b14 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:29:08 +0100 Subject: [PATCH 19/62] test(shard distributor): add tests to verify statistics are updated at heartbeat Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../etcd/executorstore/etcdstore_test.go | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index e42eb160697..440ceb76b35 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -70,6 +70,93 @@ func TestRecordHeartbeat(t *testing.T) { assert.Equal(t, types.ShardStatusREADY, reportedShards["shard-TestRecordHeartbeat"].Status) } +func TestRecordHeartbeatUpdatesShardStatistics(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "executor-shard-stats" + shardID := "shard-with-load" + + initialStats := store.ShardStatistics{ + SmoothedLoad: 1.23, + LastUpdateTime: 10, + LastMoveTime: 123, + } + + shardStatsKey, err := etcdkeys.BuildShardKey(tc.EtcdPrefix, tc.Namespace, shardID, etcdkeys.ShardStatisticsKey) + require.NoError(t, err) + payload, err := json.Marshal(initialStats) + require.NoError(t, err) + _, err = tc.Client.Put(ctx, shardStatsKey, string(payload)) + require.NoError(t, err) + + nowTS := time.Now().Unix() + + req := store.HeartbeatState{ + LastHeartbeat: nowTS, + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + shardID: { + Status: types.ShardStatusREADY, + ShardLoad: 45.6, + }, + }, + } + + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, req)) + + nsState, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + + require.Contains(t, nsState.ShardStats, shardID) + updated := nsState.ShardStats[shardID] + + assert.InDelta(t, 45.6, updated.SmoothedLoad, 1e-9) + assert.GreaterOrEqual(t, updated.LastUpdateTime, nowTS) + assert.Equal(t, initialStats.LastMoveTime, updated.LastMoveTime) +} + +func TestRecordHeartbeatSkipsShardStatisticsWithNilReport(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "executor-missing-load" + validShardID := "shard-with-valid-load" + skippedShardID := "shard-missing-load" + + nowTS := time.Now().Unix() + + req := store.HeartbeatState{ + LastHeartbeat: nowTS, + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + validShardID: { + Status: types.ShardStatusREADY, + ShardLoad: 3.21, + }, + skippedShardID: nil, + }, + } + + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, req)) + + nsState, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + + require.Contains(t, nsState.ShardStats, validShardID) + validStats := nsState.ShardStats[validShardID] + assert.InDelta(t, 3.21, validStats.SmoothedLoad, 1e-9) + assert.Greater(t, validStats.LastUpdateTime, int64(0)) + + assert.NotContains(t, nsState.ShardStats, skippedShardID) +} + func TestGetHeartbeat(t *testing.T) { tc := testhelper.SetupStoreTestCluster(t) executorStore := createStore(t, tc) From d17b38cdfa3b6920d3b20c65cf0ccf950a051b77 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 18:27:47 +0100 Subject: [PATCH 20/62] feat(shard distributor): calculate smoothed load (ewma) using the ShardStatistics Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 5ea8e20c4c7..d87fe700ae8 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -211,7 +211,8 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte } } - stats.SmoothedLoad = load + // Update smoothed load via EWMA. + stats.SmoothedLoad = ewmaSmoothedLoad(stats.SmoothedLoad, load, stats.LastUpdateTime, now) stats.LastUpdateTime = now payload, err := json.Marshal(stats) @@ -747,3 +748,20 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam } } } + +// ewmaSmoothedLoad computes an EWMA for shard load. +func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { + const tauSeconds = 30.0 // smaller = more responsive, larger = smoother, slower + if lastUpdate <= 0 { + return current + } + dt := now - lastUpdate + if dt < 0 { + dt = 0 + } + if tauSeconds <= 0 { + return current + } + alpha := 1 - math.Exp(-float64(dt)/tauSeconds) + return (1-alpha)*prev + alpha*current +} From dda32c9a0e193f5c352c0822853464275418bcde Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 19:52:59 +0100 Subject: [PATCH 21/62] fix(shard distributor): log invalid shard load Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../sharddistributor/store/etcd/executorstore/etcdstore.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index d87fe700ae8..fe657e05f84 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -170,6 +170,12 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte load := report.ShardLoad if math.IsNaN(load) || math.IsInf(load, 0) { + s.logger.Warn( + "invalid shard load reported; skipping EWMA update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + ) continue } From 799e90e0fc947ddfabacdfdcaf9db1a889bf8a58 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 20:10:55 +0100 Subject: [PATCH 22/62] feat(shard distributor): add tags for aggregated load and assigned count, for logging Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- common/log/tag/tags.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/common/log/tag/tags.go b/common/log/tag/tags.go index b2d2ec2a431..c52c11312c2 100644 --- a/common/log/tag/tags.go +++ b/common/log/tag/tags.go @@ -1185,6 +1185,14 @@ func ShardKey(shardKey string) Tag { return newStringTag("shard-key", shardKey) } +func AggregateLoad(load float64) Tag { + return newFloat64Tag("aggregated-load", load) +} + +func AssignedCount(count int) Tag { + return newInt("assigned-count", count) +} + func ElectionDelay(t time.Duration) Tag { return newDurationTag("election-delay", t) } From 5ff359bbc67b1e6024136d09b3f04b55f91af18d Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 20:14:02 +0100 Subject: [PATCH 23/62] feat(shard distributor): initial placement based on initial placement for ephemeral shards Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- service/sharddistributor/handler/handler.go | 81 ++++++++++++++++++--- 1 file changed, 70 insertions(+), 11 deletions(-) diff --git a/service/sharddistributor/handler/handler.go b/service/sharddistributor/handler/handler.go index 0a3bc0abfc8..1f220d53767 100644 --- a/service/sharddistributor/handler/handler.go +++ b/service/sharddistributor/handler/handler.go @@ -31,6 +31,7 @@ import ( "sync" "github.com/uber/cadence/common/log" + "github.com/uber/cadence/common/log/tag" "github.com/uber/cadence/common/types" "github.com/uber/cadence/service/sharddistributor/config" "github.com/uber/cadence/service/sharddistributor/store" @@ -116,30 +117,88 @@ func (h *handlerImpl) GetShardOwner(ctx context.Context, request *types.GetShard func (h *handlerImpl) assignEphemeralShard(ctx context.Context, namespace string, shardID string) (*types.GetShardOwnerResponse, error) { - // Get the current state of the namespace and find the executor with the least assigned shards + // Get the current state of the namespace and evaluate executor load to choose a placement target. state, err := h.storage.GetState(ctx, namespace) if err != nil { + h.logger.Error("failed to read namespace state for ephemeral assignment", tag.ShardNamespace(namespace), tag.ShardKey(shardID), tag.Error(err)) return nil, fmt.Errorf("get state: %w", err) } - var executor string - minAssignedShards := math.MaxInt - - for assignedExecutor, assignment := range state.ShardAssignments { - if len(assignment.AssignedShards) < minAssignedShards { - minAssignedShards = len(assignment.AssignedShards) - executor = assignedExecutor - } + executorID, aggregatedLoad, assignedCount, err := pickLeastLoadedExecutor(state) + if err != nil { + h.logger.Error( + "no eligible executor found for ephemeral assignment", + tag.ShardNamespace(namespace), + tag.ShardKey(shardID), + tag.Error(err), + ) + return nil, err } + h.logger.Info( + "selected executor for ephemeral shard assignment", + tag.AggregateLoad(aggregatedLoad), + tag.AssignedCount(assignedCount), + tag.ShardNamespace(namespace), + tag.ShardKey(shardID), + tag.ShardExecutor(executorID), + ) + // Assign the shard to the executor with the least assigned shards - err = h.storage.AssignShard(ctx, namespace, shardID, executor) + err = h.storage.AssignShard(ctx, namespace, shardID, executorID) if err != nil { + h.logger.Error( + "failed to assign ephemeral shard", + tag.ShardNamespace(namespace), + tag.ShardKey(shardID), + tag.ShardExecutor(executorID), + tag.Error(err), + ) return nil, fmt.Errorf("assign ephemeral shard: %w", err) } return &types.GetShardOwnerResponse{ - Owner: executor, + Owner: executorID, Namespace: namespace, }, nil } + +// pickLeastLoadedExecutor returns the executor with the minimal aggregated smoothed load. +// Ties are broken by fewer assigned shards. +func pickLeastLoadedExecutor(state *store.NamespaceState) (executorID string, aggregatedLoad float64, assignedCount int, err error) { + if state == nil || len(state.ShardAssignments) == 0 { + return "", 0, 0, fmt.Errorf("namespace state is nil or has no executors") + } + + var chosenID string + var chosenAggregatedLoad float64 + var chosenAssignedCount int + minAggregatedLoad := math.MaxFloat64 + minAssignedShards := math.MaxInt + + for candidate, assignment := range state.ShardAssignments { + aggregated := 0.0 + for shard := range assignment.AssignedShards { + if stats, ok := state.ShardStats[shard]; ok { + if !math.IsNaN(stats.SmoothedLoad) && !math.IsInf(stats.SmoothedLoad, 0) { + aggregated += stats.SmoothedLoad + } + } + } + + count := len(assignment.AssignedShards) + if aggregated < minAggregatedLoad || (aggregated == minAggregatedLoad && count < minAssignedShards) { + minAggregatedLoad = aggregated + minAssignedShards = count + chosenID = candidate + chosenAggregatedLoad = aggregated + chosenAssignedCount = count + } + } + + if chosenID == "" { + return "", 0, 0, fmt.Errorf("no executors in namespace state") + } + + return chosenID, chosenAggregatedLoad, chosenAssignedCount, nil +} From d73026e1465b9aee8925b21675e2b8a468e62d7b Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 22:21:18 +0100 Subject: [PATCH 24/62] test(shard distributor): tests that verify we pick the least loaded executor properly Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- service/sharddistributor/handler/handler.go | 1 - .../sharddistributor/handler/handler_test.go | 93 +++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/service/sharddistributor/handler/handler.go b/service/sharddistributor/handler/handler.go index 1f220d53767..56fdf3072e6 100644 --- a/service/sharddistributor/handler/handler.go +++ b/service/sharddistributor/handler/handler.go @@ -120,7 +120,6 @@ func (h *handlerImpl) assignEphemeralShard(ctx context.Context, namespace string // Get the current state of the namespace and evaluate executor load to choose a placement target. state, err := h.storage.GetState(ctx, namespace) if err != nil { - h.logger.Error("failed to read namespace state for ephemeral assignment", tag.ShardNamespace(namespace), tag.ShardKey(shardID), tag.Error(err)) return nil, fmt.Errorf("get state: %w", err) } diff --git a/service/sharddistributor/handler/handler_test.go b/service/sharddistributor/handler/handler_test.go index 965c5083516..5b90232dd91 100644 --- a/service/sharddistributor/handler/handler_test.go +++ b/service/sharddistributor/handler/handler_test.go @@ -210,3 +210,96 @@ func TestGetShardOwner(t *testing.T) { }) } } + +func TestPickLeastLoadedExecutor(t *testing.T) { + tests := []struct { + name string + state *store.NamespaceState + expectedOwner string + expectedLoad float64 + expectedCount int + expectedError bool + }{ + { + name: "SelectsLeastLoaded", + state: &store.NamespaceState{ + ShardAssignments: map[string]store.AssignedState{ + "exec1": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard1": {}, + "shard2": {}, + "shard3": {}, + "shard4": {}, + }, + }, + "exec2": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard4": {}, + "shard5": {}, + }, + }, + }, + ShardStats: map[string]store.ShardStatistics{ + "shard1": {SmoothedLoad: 1.0}, + "shard2": {SmoothedLoad: 2.0}, + "shard3": {SmoothedLoad: 0.5}, + "shard4": {SmoothedLoad: 0.25}, + "shard5": {SmoothedLoad: 1.0}, + }, + }, + expectedOwner: "exec2", + expectedLoad: 1.25, + expectedCount: 2, + expectedError: false, + }, + { + name: "SelectsLeastLoaded_Tie", + state: &store.NamespaceState{ + ShardAssignments: map[string]store.AssignedState{ + "exec1": {AssignedShards: map[string]*types.ShardAssignment{ + "shard1": {}, + }}, + "exec2": {AssignedShards: map[string]*types.ShardAssignment{ + "shard2": {}, + }}, + }, + ShardStats: map[string]store.ShardStatistics{ + "shard1": {SmoothedLoad: 1.0}, + "shard2": {SmoothedLoad: 1.0}, + }, + }, + expectedOwner: "exec1", + expectedLoad: 1.0, + expectedCount: 1, + expectedError: false, + }, + { + name: "SelectsLeastLoaded_NoExecutors", + state: &store.NamespaceState{}, + expectedError: true, + }, + { + name: "SelectsLeastLoaded_NoShards", + state: &store.NamespaceState{ + ShardAssignments: map[string]store.AssignedState{}, + }, + expectedError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + owner, load, count, err := pickLeastLoadedExecutor(tt.state) + if tt.expectedError { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tt.expectedOwner, owner) + require.Equal(t, tt.expectedLoad, load) + require.Equal(t, tt.expectedCount, count) + + } + }) + } + +} From 6ed2554ed4390948430c52f8a809ac0c35238531 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Tue, 28 Oct 2025 09:40:44 +0100 Subject: [PATCH 25/62] test(shard distributor): add test for GetShardOwner ShardNotFound case with new load based selection Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../sharddistributor/handler/handler_test.go | 57 ++++++++++++------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/service/sharddistributor/handler/handler_test.go b/service/sharddistributor/handler/handler_test.go index 5b90232dd91..d92d99dba47 100644 --- a/service/sharddistributor/handler/handler_test.go +++ b/service/sharddistributor/handler/handler_test.go @@ -180,6 +180,42 @@ func TestGetShardOwner(t *testing.T) { expectedError: true, expectedErrMsg: "assign shard failure", }, + { + name: "ShardNotFound_Ephemeral_LoadBased", + request: &types.GetShardOwnerRequest{ + Namespace: _testNamespaceEphemeral, + ShardKey: "new-shard", + }, + setupMocks: func(mockStore *store.MockStore) { + mockStore.EXPECT().GetShardOwner(gomock.Any(), _testNamespaceEphemeral, "new-shard").Return("", store.ErrShardNotFound) + mockStore.EXPECT().GetState(gomock.Any(), _testNamespaceEphemeral).Return(&store.NamespaceState{ + ShardAssignments: map[string]store.AssignedState{ + "owner1": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard1": {Status: types.AssignmentStatusREADY}, + "shard2": {Status: types.AssignmentStatusREADY}, + }, + }, + "owner2": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard3": {Status: types.AssignmentStatusREADY}, + }, + }, + }, + ShardStats: map[string]store.ShardStatistics{ + "shard1": {SmoothedLoad: 2.5}, + "shard2": {SmoothedLoad: 1.0}, + "shard3": {SmoothedLoad: 0.5}, + }, + }, nil) + // owner1 total load: 2.5 + 1.0 = 3.5 + // owner2 total load: 0.5 + // Should pick owner2 (least loaded) + mockStore.EXPECT().AssignShard(gomock.Any(), _testNamespaceEphemeral, "new-shard", "owner2").Return(nil) + }, + expectedOwner: "owner2", + expectedError: false, + }, } for _, tt := range tests { @@ -252,27 +288,6 @@ func TestPickLeastLoadedExecutor(t *testing.T) { expectedCount: 2, expectedError: false, }, - { - name: "SelectsLeastLoaded_Tie", - state: &store.NamespaceState{ - ShardAssignments: map[string]store.AssignedState{ - "exec1": {AssignedShards: map[string]*types.ShardAssignment{ - "shard1": {}, - }}, - "exec2": {AssignedShards: map[string]*types.ShardAssignment{ - "shard2": {}, - }}, - }, - ShardStats: map[string]store.ShardStatistics{ - "shard1": {SmoothedLoad: 1.0}, - "shard2": {SmoothedLoad: 1.0}, - }, - }, - expectedOwner: "exec1", - expectedLoad: 1.0, - expectedCount: 1, - expectedError: false, - }, { name: "SelectsLeastLoaded_NoExecutors", state: &store.NamespaceState{}, From 513e88c4dd38c57e9eac195f3cf7fcafe81d07c1 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:46:42 +0100 Subject: [PATCH 26/62] feat(shard distributor): clean up the shard statistics Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- common/metrics/defs.go | 1 + .../leader/process/processor.go | 60 +++++++++++++++++++ .../store/etcd/executorstore/etcdstore.go | 35 +++++++++++ service/sharddistributor/store/store.go | 1 + service/sharddistributor/store/store_mock.go | 14 +++++ .../store/wrappers/metered/store_generated.go | 10 ++++ 6 files changed, 121 insertions(+) diff --git a/common/metrics/defs.go b/common/metrics/defs.go index 11da0c184b8..5beb558e8fa 100644 --- a/common/metrics/defs.go +++ b/common/metrics/defs.go @@ -1469,6 +1469,7 @@ const ( ShardDistributorStoreAssignShardScope ShardDistributorStoreAssignShardsScope ShardDistributorStoreDeleteExecutorsScope + ShardDistributorStoreDeleteShardStatsScope ShardDistributorStoreGetHeartbeatScope ShardDistributorStoreGetStateScope ShardDistributorStoreRecordHeartbeatScope diff --git a/service/sharddistributor/leader/process/processor.go b/service/sharddistributor/leader/process/processor.go index 67db4222695..b4f128e60d6 100644 --- a/service/sharddistributor/leader/process/processor.go +++ b/service/sharddistributor/leader/process/processor.go @@ -221,6 +221,7 @@ func (p *namespaceProcessor) runCleanupLoop(ctx context.Context) { case <-ticker.Chan(): p.logger.Info("Periodic heartbeat cleanup triggered.") p.cleanupStaleExecutors(ctx) + p.cleanupStaleShardStats(ctx) } } } @@ -254,6 +255,65 @@ func (p *namespaceProcessor) cleanupStaleExecutors(ctx context.Context) { } } +func (p *namespaceProcessor) cleanupStaleShardStats(ctx context.Context) { + namespaceState, err := p.shardStore.GetState(ctx, p.namespaceCfg.Name) + if err != nil { + p.logger.Error("Failed to get state for shard stats cleanup", tag.Error(err)) + return + } + + activeShards := make(map[string]struct{}) + now := p.timeSource.Now().Unix() + shardStatsTTL := int64(p.cfg.HeartbeatTTL.Seconds()) + + // 1. build set of active executors + + // add all assigned shards from executors that are ACTIVE and not stale + for executorID, assignedState := range namespaceState.ShardAssignments { + executor, exists := namespaceState.Executors[executorID] + if !exists { + continue + } + + isActive := executor.Status == types.ExecutorStatusACTIVE + isNotStale := (now - executor.LastHeartbeat) <= shardStatsTTL + if isActive && isNotStale { + for shardID := range assignedState.AssignedShards { + activeShards[shardID] = struct{}{} + } + } + } + + // add all shards in ReportedShards where the status is not DONE + for _, heartbeatState := range namespaceState.Executors { + for shardID, shardStatusReport := range heartbeatState.ReportedShards { + if shardStatusReport.Status != types.ShardStatusDONE { + activeShards[shardID] = struct{}{} + } + } + } + + // 2. build set of stale shard stats + + // append all shard stats that are not in the active shards set + var staleShardStats []string + for shardID := range namespaceState.ShardStats { + if _, ok := activeShards[shardID]; !ok { + staleShardStats = append(staleShardStats, shardID) + } + } + + if len(staleShardStats) == 0 { + return + } + + p.logger.Info("Removing stale shard stats") + // Use the leader guard for the delete operation. + if err := p.shardStore.DeleteShardStats(ctx, p.namespaceCfg.Name, staleShardStats, p.election.Guard()); err != nil { + p.logger.Error("Failed to delete stale shard stats", tag.Error(err)) + } +} + // rebalanceShards is the core logic for distributing shards among active executors. func (p *namespaceProcessor) rebalanceShards(ctx context.Context) (err error) { metricsLoopScope := p.metricsClient.Scope(metrics.ShardDistributorAssignLoopScope) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 1630b8b3efc..46ea57b6eee 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -580,6 +580,41 @@ func (s *executorStoreImpl) DeleteExecutors(ctx context.Context, namespace strin return nil } +func (s *executorStoreImpl) DeleteShardStats(ctx context.Context, namespace string, shardIDs []string, guard store.GuardFunc) error { + if len(shardIDs) == 0 { + return nil + } + var ops []clientv3.Op + for _, shardID := range shardIDs { + shardStatsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) + if err != nil { + return fmt.Errorf("build shard statistics key: %w", err) + } + ops = append(ops, clientv3.OpDelete(shardStatsKey)) + } + + nativeTxn := s.client.Txn(ctx) + guardedTxn, err := guard(nativeTxn) + + if err != nil { + return fmt.Errorf("apply transaction guard: %w", err) + } + etcdGuardedTxn, ok := guardedTxn.(clientv3.Txn) + if !ok { + return fmt.Errorf("guard function returned invalid transaction type") + } + + etcdGuardedTxn = etcdGuardedTxn.Then(ops...) + resp, err := etcdGuardedTxn.Commit() + if err != nil { + return fmt.Errorf("commit shard statistics deletion: %w", err) + } + if !resp.Succeeded { + return fmt.Errorf("transaction failed, leadership may have changed") + } + return nil +} + func (s *executorStoreImpl) GetShardOwner(ctx context.Context, namespace, shardID string) (string, error) { return s.shardCache.GetShardOwner(ctx, namespace, shardID) } diff --git a/service/sharddistributor/store/store.go b/service/sharddistributor/store/store.go index 31adffa58a1..11c01914d0d 100644 --- a/service/sharddistributor/store/store.go +++ b/service/sharddistributor/store/store.go @@ -60,6 +60,7 @@ type Store interface { AssignShards(ctx context.Context, namespace string, request AssignShardsRequest, guard GuardFunc) error Subscribe(ctx context.Context, namespace string) (<-chan int64, error) DeleteExecutors(ctx context.Context, namespace string, executorIDs []string, guard GuardFunc) error + DeleteShardStats(ctx context.Context, namespace string, shardIDs []string, guard GuardFunc) error GetShardOwner(ctx context.Context, namespace, shardID string) (string, error) AssignShard(ctx context.Context, namespace, shardID, executorID string) error diff --git a/service/sharddistributor/store/store_mock.go b/service/sharddistributor/store/store_mock.go index 6dac175c750..1bae8344306 100644 --- a/service/sharddistributor/store/store_mock.go +++ b/service/sharddistributor/store/store_mock.go @@ -106,6 +106,20 @@ func (mr *MockStoreMockRecorder) DeleteExecutors(ctx, namespace, executorIDs, gu return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteExecutors", reflect.TypeOf((*MockStore)(nil).DeleteExecutors), ctx, namespace, executorIDs, guard) } +// DeleteShardStats mocks base method. +func (m *MockStore) DeleteShardStats(ctx context.Context, namespace string, shardIDs []string, guard GuardFunc) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteShardStats", ctx, namespace, shardIDs, guard) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteShardStats indicates an expected call of DeleteShardStats. +func (mr *MockStoreMockRecorder) DeleteShardStats(ctx, namespace, shardIDs, guard any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteShardStats", reflect.TypeOf((*MockStore)(nil).DeleteShardStats), ctx, namespace, shardIDs, guard) +} + // GetHeartbeat mocks base method. func (m *MockStore) GetHeartbeat(ctx context.Context, namespace, executorID string) (*HeartbeatState, *AssignedState, error) { m.ctrl.T.Helper() diff --git a/service/sharddistributor/store/wrappers/metered/store_generated.go b/service/sharddistributor/store/wrappers/metered/store_generated.go index 591e3697ae7..cef7ce80154 100644 --- a/service/sharddistributor/store/wrappers/metered/store_generated.go +++ b/service/sharddistributor/store/wrappers/metered/store_generated.go @@ -69,6 +69,16 @@ func (c *meteredStore) DeleteExecutors(ctx context.Context, namespace string, ex return } +func (c *meteredStore) DeleteShardStats(ctx context.Context, namespace string, shardIDs []string, guard store.GuardFunc) (err error) { + op := func() error { + err = c.wrapped.DeleteShardStats(ctx, namespace, shardIDs, guard) + return err + } + + err = c.call(metrics.ShardDistributorStoreDeleteShardStatsScope, op, metrics.NamespaceTag(namespace)) + return +} + func (c *meteredStore) GetHeartbeat(ctx context.Context, namespace string, executorID string) (hp1 *store.HeartbeatState, ap1 *store.AssignedState, err error) { op := func() error { hp1, ap1, err = c.wrapped.GetHeartbeat(ctx, namespace, executorID) From 98335257bab5e143d9b739e4122429c47d5dbad7 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:48:15 +0100 Subject: [PATCH 27/62] test(shard distributor): add test case for when shard stats are deleted Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../leader/process/processor_test.go | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/service/sharddistributor/leader/process/processor_test.go b/service/sharddistributor/leader/process/processor_test.go index fa351a067bd..df96c650cdc 100644 --- a/service/sharddistributor/leader/process/processor_test.go +++ b/service/sharddistributor/leader/process/processor_test.go @@ -185,6 +185,55 @@ func TestCleanupStaleExecutors(t *testing.T) { processor.cleanupStaleExecutors(context.Background()) } +func TestCleanupStaleShardStats(t *testing.T) { + t.Run("stale shard stats are deleted", func(t *testing.T) { + mocks := setupProcessorTest(t, config.NamespaceTypeFixed) + defer mocks.ctrl.Finish() + processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) + + now := mocks.timeSource.Now() + + heartbeats := map[string]store.HeartbeatState{ + "exec-active": {LastHeartbeat: now.Unix(), Status: types.ExecutorStatusACTIVE}, + "exec-stale": {LastHeartbeat: now.Add(-2 * time.Second).Unix()}, + } + + assignments := map[string]store.AssignedState{ + "exec-active": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard-1": {Status: types.AssignmentStatusREADY}, + "shard-2": {Status: types.AssignmentStatusREADY}, + }, + }, + "exec-stale": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard-3": {Status: types.AssignmentStatusREADY}, + }, + }, + } + + shardStats := map[string]store.ShardStatistics{ + "shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + "shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + } + + namespaceState := &store.NamespaceState{ + Executors: heartbeats, + ShardAssignments: assignments, + ShardStats: shardStats, + } + + gomock.InOrder( + mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(namespaceState, nil), + mocks.election.EXPECT().Guard().Return(store.NopGuard()), + mocks.store.EXPECT().DeleteShardStats(gomock.Any(), mocks.cfg.Name, []string{"shard-3"}, gomock.Any()).Return(nil), + ) + processor.cleanupStaleShardStats(context.Background()) + }) + +} + func TestRebalance_StoreErrors(t *testing.T) { mocks := setupProcessorTest(t, config.NamespaceTypeFixed) defer mocks.ctrl.Finish() From 0332fe56dd78cd59d90435705413f7b0235332ec Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 29 Oct 2025 23:18:10 +0100 Subject: [PATCH 28/62] fix(shard distributor): add mapping (new metric) Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- common/metrics/defs.go | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/common/metrics/defs.go b/common/metrics/defs.go index 5beb558e8fa..dd66e302f4b 100644 --- a/common/metrics/defs.go +++ b/common/metrics/defs.go @@ -2143,18 +2143,19 @@ var ScopeDefs = map[ServiceIdx]map[ScopeIdx]scopeDefinition{ DiagnosticsWorkflowScope: {operation: "DiagnosticsWorkflow"}, }, ShardDistributor: { - ShardDistributorGetShardOwnerScope: {operation: "GetShardOwner"}, - ShardDistributorHeartbeatScope: {operation: "ExecutorHeartbeat"}, - ShardDistributorAssignLoopScope: {operation: "ShardAssignLoop"}, - ShardDistributorExecutorScope: {operation: "Executor"}, - ShardDistributorStoreGetShardOwnerScope: {operation: "StoreGetShardOwner"}, - ShardDistributorStoreAssignShardScope: {operation: "StoreAssignShard"}, - ShardDistributorStoreAssignShardsScope: {operation: "StoreAssignShards"}, - ShardDistributorStoreDeleteExecutorsScope: {operation: "StoreDeleteExecutors"}, - ShardDistributorStoreGetHeartbeatScope: {operation: "StoreGetHeartbeat"}, - ShardDistributorStoreGetStateScope: {operation: "StoreGetState"}, - ShardDistributorStoreRecordHeartbeatScope: {operation: "StoreRecordHeartbeat"}, - ShardDistributorStoreSubscribeScope: {operation: "StoreSubscribe"}, + ShardDistributorGetShardOwnerScope: {operation: "GetShardOwner"}, + ShardDistributorHeartbeatScope: {operation: "ExecutorHeartbeat"}, + ShardDistributorAssignLoopScope: {operation: "ShardAssignLoop"}, + ShardDistributorExecutorScope: {operation: "Executor"}, + ShardDistributorStoreGetShardOwnerScope: {operation: "StoreGetShardOwner"}, + ShardDistributorStoreAssignShardScope: {operation: "StoreAssignShard"}, + ShardDistributorStoreAssignShardsScope: {operation: "StoreAssignShards"}, + ShardDistributorStoreDeleteExecutorsScope: {operation: "StoreDeleteExecutors"}, + ShardDistributorStoreDeleteShardStatsScope: {operation: "StoreDeleteShardStats"}, + ShardDistributorStoreGetHeartbeatScope: {operation: "StoreGetHeartbeat"}, + ShardDistributorStoreGetStateScope: {operation: "StoreGetState"}, + ShardDistributorStoreRecordHeartbeatScope: {operation: "StoreRecordHeartbeat"}, + ShardDistributorStoreSubscribeScope: {operation: "StoreSubscribe"}, }, } From d5a13d97cc03f18a5dba20dd4bfb17e5fd71ff47 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Thu, 30 Oct 2025 09:33:01 +0100 Subject: [PATCH 29/62] feat(shard distributor): retain shard stats while shards are within heartbeat TTL Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../leader/process/processor.go | 15 ++++++++--- .../leader/process/processor_test.go | 26 ++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/service/sharddistributor/leader/process/processor.go b/service/sharddistributor/leader/process/processor.go index b4f128e60d6..da34c9ac385 100644 --- a/service/sharddistributor/leader/process/processor.go +++ b/service/sharddistributor/leader/process/processor.go @@ -297,10 +297,19 @@ func (p *namespaceProcessor) cleanupStaleShardStats(ctx context.Context) { // append all shard stats that are not in the active shards set var staleShardStats []string - for shardID := range namespaceState.ShardStats { - if _, ok := activeShards[shardID]; !ok { - staleShardStats = append(staleShardStats, shardID) + for shardID, stats := range namespaceState.ShardStats { + if _, ok := activeShards[shardID]; ok { + continue + } + recentUpdate := stats.LastUpdateTime > 0 && (now-stats.LastUpdateTime) <= shardStatsTTL + recentMove := stats.LastMoveTime > 0 && (now-stats.LastMoveTime) <= shardStatsTTL + if recentUpdate || recentMove { + // Preserve stats that have been updated recently to allow cooldown/load history to + // survive executor churn. These shards are likely awaiting reassignment, + // so we don't want to delete them. + continue } + staleShardStats = append(staleShardStats, shardID) } if len(staleShardStats) == 0 { diff --git a/service/sharddistributor/leader/process/processor_test.go b/service/sharddistributor/leader/process/processor_test.go index df96c650cdc..639dba315c5 100644 --- a/service/sharddistributor/leader/process/processor_test.go +++ b/service/sharddistributor/leader/process/processor_test.go @@ -215,7 +215,7 @@ func TestCleanupStaleShardStats(t *testing.T) { shardStats := map[string]store.ShardStatistics{ "shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, "shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Add(-2 * time.Second).Unix(), LastMoveTime: now.Add(-2 * time.Second).Unix()}, } namespaceState := &store.NamespaceState{ @@ -232,6 +232,30 @@ func TestCleanupStaleShardStats(t *testing.T) { processor.cleanupStaleShardStats(context.Background()) }) + t.Run("recent shard stats are preserved", func(t *testing.T) { + mocks := setupProcessorTest(t, config.NamespaceTypeFixed) + defer mocks.ctrl.Finish() + processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) + + now := mocks.timeSource.Now() + + expiredExecutor := now.Add(-2 * time.Second).Unix() + state := &store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "exec-stale": {LastHeartbeat: expiredExecutor}, + }, + ShardAssignments: map[string]store.AssignedState{}, + ShardStats: map[string]store.ShardStatistics{ + "shard-1": {SmoothedLoad: 5.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + }, + } + + mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(state, nil) + processor.cleanupStaleShardStats(context.Background()) + + // No delete expected since stats are recent. + }) + } func TestRebalance_StoreErrors(t *testing.T) { From 634bc026759085d092f55276b5a3850be160eb1a Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:26:41 +0100 Subject: [PATCH 30/62] feat: function to update shard statistics from heartbeat (currently no ewma) Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 46ea57b6eee..b1ff4d2efa3 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -8,6 +8,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "strconv" "time" @@ -149,9 +150,94 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec if err != nil { return fmt.Errorf("record heartbeat: %w", err) } + + s.updateShardStatisticsFromHeartbeat(ctx, namespace, executorID, request.ReportedShards) + return nil } +func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { + if len(reported) == 0 { + return + } + + now := s.timeSource.Now().Unix() + + for shardID, report := range reported { + if report == nil { + continue + } + + load := report.ShardLoad + if math.IsNaN(load) || math.IsInf(load, 0) { + continue + } + + shardStatsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) + if err != nil { + s.logger.Warn( + "failed to build shard statistics key from heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + statsResp, err := s.client.Get(ctx, shardStatsKey) + if err != nil { + s.logger.Warn( + "failed to read shard statistics for heartbeat update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + var stats store.ShardStatistics + if len(statsResp.Kvs) > 0 { + if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + s.logger.Warn( + "failed to unmarshal shard statistics for heartbeat update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + } + + stats.SmoothedLoad = load + stats.LastUpdateTime = now + + payload, err := json.Marshal(stats) + if err != nil { + s.logger.Warn( + "failed to marshal shard statistics after heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + if _, err := s.client.Put(ctx, shardStatsKey, string(payload)); err != nil { + s.logger.Warn( + "failed to persist shard statistics from heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + } + } +} + // GetHeartbeat retrieves the last known heartbeat state for a single executor. func (s *executorStoreImpl) GetHeartbeat(ctx context.Context, namespace string, executorID string) (*store.HeartbeatState, *store.AssignedState, error) { // The prefix for all keys related to a single executor. From 812e854f58ee32709ca189f32a25859b85a24d6e Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:29:08 +0100 Subject: [PATCH 31/62] test(shard distributor): add tests to verify statistics are updated at heartbeat Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../etcd/executorstore/etcdstore_test.go | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index e42eb160697..440ceb76b35 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -70,6 +70,93 @@ func TestRecordHeartbeat(t *testing.T) { assert.Equal(t, types.ShardStatusREADY, reportedShards["shard-TestRecordHeartbeat"].Status) } +func TestRecordHeartbeatUpdatesShardStatistics(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "executor-shard-stats" + shardID := "shard-with-load" + + initialStats := store.ShardStatistics{ + SmoothedLoad: 1.23, + LastUpdateTime: 10, + LastMoveTime: 123, + } + + shardStatsKey, err := etcdkeys.BuildShardKey(tc.EtcdPrefix, tc.Namespace, shardID, etcdkeys.ShardStatisticsKey) + require.NoError(t, err) + payload, err := json.Marshal(initialStats) + require.NoError(t, err) + _, err = tc.Client.Put(ctx, shardStatsKey, string(payload)) + require.NoError(t, err) + + nowTS := time.Now().Unix() + + req := store.HeartbeatState{ + LastHeartbeat: nowTS, + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + shardID: { + Status: types.ShardStatusREADY, + ShardLoad: 45.6, + }, + }, + } + + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, req)) + + nsState, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + + require.Contains(t, nsState.ShardStats, shardID) + updated := nsState.ShardStats[shardID] + + assert.InDelta(t, 45.6, updated.SmoothedLoad, 1e-9) + assert.GreaterOrEqual(t, updated.LastUpdateTime, nowTS) + assert.Equal(t, initialStats.LastMoveTime, updated.LastMoveTime) +} + +func TestRecordHeartbeatSkipsShardStatisticsWithNilReport(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "executor-missing-load" + validShardID := "shard-with-valid-load" + skippedShardID := "shard-missing-load" + + nowTS := time.Now().Unix() + + req := store.HeartbeatState{ + LastHeartbeat: nowTS, + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + validShardID: { + Status: types.ShardStatusREADY, + ShardLoad: 3.21, + }, + skippedShardID: nil, + }, + } + + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, req)) + + nsState, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + + require.Contains(t, nsState.ShardStats, validShardID) + validStats := nsState.ShardStats[validShardID] + assert.InDelta(t, 3.21, validStats.SmoothedLoad, 1e-9) + assert.Greater(t, validStats.LastUpdateTime, int64(0)) + + assert.NotContains(t, nsState.ShardStats, skippedShardID) +} + func TestGetHeartbeat(t *testing.T) { tc := testhelper.SetupStoreTestCluster(t) executorStore := createStore(t, tc) From b9813e735fc9711f3a4c856500c9cb68089b4ef3 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 18:27:47 +0100 Subject: [PATCH 32/62] feat(shard distributor): calculate smoothed load (ewma) using the ShardStatistics Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index b1ff4d2efa3..83b8d66e999 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -211,7 +211,8 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte } } - stats.SmoothedLoad = load + // Update smoothed load via EWMA. + stats.SmoothedLoad = ewmaSmoothedLoad(stats.SmoothedLoad, load, stats.LastUpdateTime, now) stats.LastUpdateTime = now payload, err := json.Marshal(stats) @@ -782,3 +783,20 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam } } } + +// ewmaSmoothedLoad computes an EWMA for shard load. +func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { + const tauSeconds = 30.0 // smaller = more responsive, larger = smoother, slower + if lastUpdate <= 0 { + return current + } + dt := now - lastUpdate + if dt < 0 { + dt = 0 + } + if tauSeconds <= 0 { + return current + } + alpha := 1 - math.Exp(-float64(dt)/tauSeconds) + return (1-alpha)*prev + alpha*current +} From dfb7448c8946869b7c51352b75215d7792514639 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 19:52:59 +0100 Subject: [PATCH 33/62] fix(shard distributor): log invalid shard load Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../sharddistributor/store/etcd/executorstore/etcdstore.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 83b8d66e999..737da59d821 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -170,6 +170,12 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte load := report.ShardLoad if math.IsNaN(load) || math.IsInf(load, 0) { + s.logger.Warn( + "invalid shard load reported; skipping EWMA update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + ) continue } From 36ec08f0c622c33152992e8f7cf9c440e0d8b23c Mon Sep 17 00:00:00 2001 From: Theis Randeris Mathiassen Date: Sun, 2 Nov 2025 17:00:02 +0100 Subject: [PATCH 34/62] chore: added logger warning and simplified ewma calculation Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 737da59d821..d4f525affa6 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -165,6 +165,11 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte for shardID, report := range reported { if report == nil { + s.logger.Warn("empty report; skipping EWMA update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + ) continue } @@ -790,19 +795,12 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam } } -// ewmaSmoothedLoad computes an EWMA for shard load. func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { const tauSeconds = 30.0 // smaller = more responsive, larger = smoother, slower - if lastUpdate <= 0 { - return current - } - dt := now - lastUpdate - if dt < 0 { - dt = 0 - } - if tauSeconds <= 0 { + if lastUpdate <= 0 || tauSeconds <= 0 { return current } + dt := max(now-lastUpdate, 0) alpha := 1 - math.Exp(-float64(dt)/tauSeconds) return (1-alpha)*prev + alpha*current } From af733e6bd3a6193a66a6fb967a9bd09fa391ecc5 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Thu, 6 Nov 2025 15:52:56 +0100 Subject: [PATCH 35/62] fix: remove duplicate test introduced in merge Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../leader/process/processor_test.go | 73 ------------------- 1 file changed, 73 deletions(-) diff --git a/service/sharddistributor/leader/process/processor_test.go b/service/sharddistributor/leader/process/processor_test.go index 0e7cede9481..1c6e0422caf 100644 --- a/service/sharddistributor/leader/process/processor_test.go +++ b/service/sharddistributor/leader/process/processor_test.go @@ -254,79 +254,6 @@ func TestCleanupStaleShardStats(t *testing.T) { } -func TestCleanupStaleShardStats(t *testing.T) { - t.Run("stale shard stats are deleted", func(t *testing.T) { - mocks := setupProcessorTest(t, config.NamespaceTypeFixed) - defer mocks.ctrl.Finish() - processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) - - now := mocks.timeSource.Now() - - heartbeats := map[string]store.HeartbeatState{ - "exec-active": {LastHeartbeat: now.Unix(), Status: types.ExecutorStatusACTIVE}, - "exec-stale": {LastHeartbeat: now.Add(-2 * time.Second).Unix()}, - } - - assignments := map[string]store.AssignedState{ - "exec-active": { - AssignedShards: map[string]*types.ShardAssignment{ - "shard-1": {Status: types.AssignmentStatusREADY}, - "shard-2": {Status: types.AssignmentStatusREADY}, - }, - }, - "exec-stale": { - AssignedShards: map[string]*types.ShardAssignment{ - "shard-3": {Status: types.AssignmentStatusREADY}, - }, - }, - } - - shardStats := map[string]store.ShardStatistics{ - "shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - "shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Add(-2 * time.Second).Unix(), LastMoveTime: now.Add(-2 * time.Second).Unix()}, - } - - namespaceState := &store.NamespaceState{ - Executors: heartbeats, - ShardAssignments: assignments, - ShardStats: shardStats, - } - - gomock.InOrder( - mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(namespaceState, nil), - mocks.election.EXPECT().Guard().Return(store.NopGuard()), - mocks.store.EXPECT().DeleteShardStats(gomock.Any(), mocks.cfg.Name, []string{"shard-3"}, gomock.Any()).Return(nil), - ) - processor.cleanupStaleShardStats(context.Background()) - }) - - t.Run("recent shard stats are preserved", func(t *testing.T) { - mocks := setupProcessorTest(t, config.NamespaceTypeFixed) - defer mocks.ctrl.Finish() - processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) - - now := mocks.timeSource.Now() - - expiredExecutor := now.Add(-2 * time.Second).Unix() - state := &store.NamespaceState{ - Executors: map[string]store.HeartbeatState{ - "exec-stale": {LastHeartbeat: expiredExecutor}, - }, - ShardAssignments: map[string]store.AssignedState{}, - ShardStats: map[string]store.ShardStatistics{ - "shard-1": {SmoothedLoad: 5.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - }, - } - - mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(state, nil) - processor.cleanupStaleShardStats(context.Background()) - - // No delete expected since stats are recent. - }) - -} - func TestRebalance_StoreErrors(t *testing.T) { mocks := setupProcessorTest(t, config.NamespaceTypeFixed) defer mocks.ctrl.Finish() From a52e86f3d2aa0c8a379274c077e3db2cfa296075 Mon Sep 17 00:00:00 2001 From: Theis Randeris Mathiassen Date: Tue, 11 Nov 2025 11:16:15 +0100 Subject: [PATCH 36/62] chore: consistent error checking, and rename function Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 2fb1815660c..a22c29fd466 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -155,12 +155,12 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec return fmt.Errorf("record heartbeat: %w", err) } - s.updateShardStatisticsFromHeartbeat(ctx, namespace, executorID, request.ReportedShards) + s.recordShardStatistics(ctx, namespace, executorID, request.ReportedShards) return nil } -func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { +func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { if len(reported) == 0 { return } @@ -214,7 +214,8 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte var stats store.ShardStatistics if len(statsResp.Kvs) > 0 { - if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + err := json.Unmarshal(statsResp.Kvs[0].Value, &stats) + if err != nil { s.logger.Warn( "failed to unmarshal shard statistics for heartbeat update", tag.ShardNamespace(namespace), @@ -242,7 +243,8 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte continue } - if _, err := s.client.Put(ctx, shardStatsKey, string(payload)); err != nil { + _, err = s.client.Put(ctx, shardStatsKey, string(payload)) + if err != nil { s.logger.Warn( "failed to persist shard statistics from heartbeat", tag.ShardNamespace(namespace), @@ -817,7 +819,7 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam } func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { - const tauSeconds = 30.0 // smaller = more responsive, larger = smoother, slower + const tauSeconds = 30.0 // smaller = more responsive, larger = smoother if lastUpdate <= 0 || tauSeconds <= 0 { return current } From abfc80e08e3ceea5c925d915685d5fc2a9f5b8c6 Mon Sep 17 00:00:00 2001 From: Theis Randeris Mathiassen Date: Tue, 11 Nov 2025 12:07:23 +0100 Subject: [PATCH 37/62] chore: added decompress to unmarshal Signed-off-by: Theis Randeris Mathiassen --- service/sharddistributor/store/etcd/executorstore/etcdstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index a22c29fd466..8d721450a5b 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -214,7 +214,7 @@ func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace var stats store.ShardStatistics if len(statsResp.Kvs) > 0 { - err := json.Unmarshal(statsResp.Kvs[0].Value, &stats) + err := common.DecompressAndUnmarshal(statsResp.Kvs[0].Value, &stats) if err != nil { s.logger.Warn( "failed to unmarshal shard statistics for heartbeat update", From df0feaf2c495b98f08919b08924afa5a1d7a646a Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Sun, 19 Oct 2025 22:10:17 +0200 Subject: [PATCH 38/62] feat(shard distributor): persist shard metrics in etcd store Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- service/sharddistributor/store/etcd/executorstore/etcdstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index d3498345c56..e7130bc49dc 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -284,7 +284,7 @@ func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*st Executors: heartbeatStates, ShardStats: shardStats, ShardAssignments: assignedStates, - GlobalRevision: resp.Header.Revision, + GlobalRevision: globalRevision, }, nil } From 8546a269fef459be3ed215401673c62f799a85ba Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Mon, 20 Oct 2025 01:37:39 +0200 Subject: [PATCH 39/62] feat(shard distributor): move shard metric updates out of AssignShards txn and retry monotonically Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index e7130bc49dc..ecf4869f9d2 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -46,6 +46,18 @@ type shardStatisticsUpdate struct { desiredLastMove int64 // intended LastMoveTime for this update } +// shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard +// after the main transaction in AssignShards for exec state. +// Retains metrics to safely merge concurrent updates before retrying. +type shardMetricsUpdate struct { + key string + shardID string + metrics store.ShardMetrics + modRevision int64 + desiredLastMove int64 // intended LastMoveTime for this update + defaultLastUpdate int64 +} + // ExecutorStoreParams defines the dependencies for the etcd store, for use with fx. type ExecutorStoreParams struct { fx.In @@ -407,6 +419,75 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } +// applyShardMetricsUpdates updates shard metrics (like last_move_time) after AssignShards. +// Decided to run these writes outside the primary transaction +// so we are less likely to exceed etcd's max txn-op threshold (128?), and we retry +// logs failures instead of failing the main assignment. +func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { + for i := range updates { + update := &updates[i] + + for { + // If a newer rebalance already set a later LastMoveTime, there's nothing left for this iteration. + if update.metrics.LastMoveTime >= update.desiredLastMove { + break + } + + update.metrics.LastMoveTime = update.desiredLastMove + + payload, err := json.Marshal(update.metrics) + if err != nil { + // Log and move on. failing metrics formatting should not invalidate the finished assignment. + s.logger.Warn("failed to marshal shard metrics after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + + txnResp, err := s.client.Txn(ctx). + If(clientv3.Compare(clientv3.ModRevision(update.key), "=", update.modRevision)). + Then(clientv3.OpPut(update.key, string(payload))). + Commit() + if err != nil { + // log and abandon this shard rather than propagating an error after assignments commit. + s.logger.Warn("failed to commit shard metrics update after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + + if txnResp.Succeeded { + break + } + + if ctx.Err() != nil { + s.logger.Warn("context canceled while updating shard metrics", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(ctx.Err())) + return + } + + // Another writer beat us. pull the latest metrics so we can merge their view and retry. + metricsResp, err := s.client.Get(ctx, update.key) + if err != nil { + // Unable to observe the conflicting write, so we skip this shard and keep the assignment result. + s.logger.Warn("failed to refresh shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + + update.modRevision = 0 + if len(metricsResp.Kvs) > 0 { + update.modRevision = metricsResp.Kvs[0].ModRevision + if err := json.Unmarshal(metricsResp.Kvs[0].Value, &update.metrics); err != nil { + // If the value is corrupt we cannot safely merge, so we abandon this shard's metrics update. + s.logger.Warn("failed to unmarshal shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) + break + } + } else { + update.metrics = store.ShardMetrics{ + SmoothedLoad: 0, + LastUpdateTime: update.defaultLastUpdate, + } + update.modRevision = 0 + } + } + } +} + func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, executorID string) error { assignedState, err := etcdkeys.BuildExecutorKey(s.prefix, namespace, executorID, etcdkeys.ExecutorAssignedStateKey) if err != nil { From dde87ef43ee821401ea52abad91613bc7810a49e Mon Sep 17 00:00:00 2001 From: Andreas Holt Date: Mon, 20 Oct 2025 12:17:21 +0200 Subject: [PATCH 40/62] fix(shard distributor): keep NamespaceState revisions tied to assignments Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- service/sharddistributor/store/etcd/executorstore/etcdstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index ecf4869f9d2..79d5f263e58 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -296,7 +296,7 @@ func (s *executorStoreImpl) GetState(ctx context.Context, namespace string) (*st Executors: heartbeatStates, ShardStats: shardStats, ShardAssignments: assignedStates, - GlobalRevision: globalRevision, + GlobalRevision: resp.Header.Revision, }, nil } From 415e80c9f45d9e375cf9f9e28df8f9c154debb29 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:24:36 +0200 Subject: [PATCH 41/62] refactor(shard distributor): use shard cache and clock for preparing shard metrics, move out to staging to separate function Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 55 ++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 79d5f263e58..0da6e7f2d0d 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -48,7 +48,6 @@ type shardStatisticsUpdate struct { // shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard // after the main transaction in AssignShards for exec state. -// Retains metrics to safely merge concurrent updates before retrying. type shardMetricsUpdate struct { key string shardID string @@ -419,6 +418,60 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } +func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { + var updates []shardMetricsUpdate + + for executorID, state := range newAssignments { + for shardID := range state.AssignedShards { + now := s.timeSource.Now().Unix() + + oldOwner, err := s.shardCache.GetShardOwner(ctx, namespace, shardID) + if err != nil && !errors.Is(err, store.ErrShardNotFound) { + return nil, fmt.Errorf("lookup cached shard owner: %w", err) + } + + // we should just skip if the owner hasn't changed + if err == nil && oldOwner == executorID { + continue + } + + shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) + if err != nil { + return nil, fmt.Errorf("build shard metrics key: %w", err) + } + + metricsResp, err := s.client.Get(ctx, shardMetricsKey) + if err != nil { + return nil, fmt.Errorf("get shard metrics: %w", err) + } + + metrics := store.ShardMetrics{} + modRevision := int64(0) + + if len(metricsResp.Kvs) > 0 { + modRevision = metricsResp.Kvs[0].ModRevision + if err := json.Unmarshal(metricsResp.Kvs[0].Value, &metrics); err != nil { + return nil, fmt.Errorf("unmarshal shard metrics: %w", err) + } + } else { + metrics.SmoothedLoad = 0 + metrics.LastUpdateTime = now + } + + updates = append(updates, shardMetricsUpdate{ + key: shardMetricsKey, + shardID: shardID, + metrics: metrics, + modRevision: modRevision, + desiredLastMove: now, + defaultLastUpdate: metrics.LastUpdateTime, + }) + } + } + + return updates, nil +} + // applyShardMetricsUpdates updates shard metrics (like last_move_time) after AssignShards. // Decided to run these writes outside the primary transaction // so we are less likely to exceed etcd's max txn-op threshold (128?), and we retry From c67d5c3bcc792c545bc748d10b5b8de0383be172 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Fri, 24 Oct 2025 00:35:46 +0200 Subject: [PATCH 42/62] feat(shard distributor): simplify shard metrics updates Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 105 +++++------------- 1 file changed, 29 insertions(+), 76 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 0da6e7f2d0d..c465102194a 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -49,12 +49,10 @@ type shardStatisticsUpdate struct { // shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard // after the main transaction in AssignShards for exec state. type shardMetricsUpdate struct { - key string - shardID string - metrics store.ShardMetrics - modRevision int64 - desiredLastMove int64 // intended LastMoveTime for this update - defaultLastUpdate int64 + key string + shardID string + metrics store.ShardMetrics + desiredLastMove int64 // intended LastMoveTime for this update } // ExecutorStoreParams defines the dependencies for the etcd store, for use with fx. @@ -446,10 +444,8 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name } metrics := store.ShardMetrics{} - modRevision := int64(0) if len(metricsResp.Kvs) > 0 { - modRevision = metricsResp.Kvs[0].ModRevision if err := json.Unmarshal(metricsResp.Kvs[0].Value, &metrics); err != nil { return nil, fmt.Errorf("unmarshal shard metrics: %w", err) } @@ -459,12 +455,10 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name } updates = append(updates, shardMetricsUpdate{ - key: shardMetricsKey, - shardID: shardID, - metrics: metrics, - modRevision: modRevision, - desiredLastMove: now, - defaultLastUpdate: metrics.LastUpdateTime, + key: shardMetricsKey, + shardID: shardID, + metrics: metrics, + desiredLastMove: now, }) } } @@ -472,71 +466,30 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name return updates, nil } -// applyShardMetricsUpdates updates shard metrics (like last_move_time) after AssignShards. -// Decided to run these writes outside the primary transaction -// so we are less likely to exceed etcd's max txn-op threshold (128?), and we retry -// logs failures instead of failing the main assignment. +// applyShardMetricsUpdates updates shard metrics. +// Is intentionally made tolerant of failures since the data is telemetry only. func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { - for i := range updates { - update := &updates[i] - - for { - // If a newer rebalance already set a later LastMoveTime, there's nothing left for this iteration. - if update.metrics.LastMoveTime >= update.desiredLastMove { - break - } - - update.metrics.LastMoveTime = update.desiredLastMove - - payload, err := json.Marshal(update.metrics) - if err != nil { - // Log and move on. failing metrics formatting should not invalidate the finished assignment. - s.logger.Warn("failed to marshal shard metrics after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } - - txnResp, err := s.client.Txn(ctx). - If(clientv3.Compare(clientv3.ModRevision(update.key), "=", update.modRevision)). - Then(clientv3.OpPut(update.key, string(payload))). - Commit() - if err != nil { - // log and abandon this shard rather than propagating an error after assignments commit. - s.logger.Warn("failed to commit shard metrics update after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } - - if txnResp.Succeeded { - break - } - - if ctx.Err() != nil { - s.logger.Warn("context canceled while updating shard metrics", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(ctx.Err())) - return - } + for _, update := range updates { + update.metrics.LastMoveTime = update.desiredLastMove - // Another writer beat us. pull the latest metrics so we can merge their view and retry. - metricsResp, err := s.client.Get(ctx, update.key) - if err != nil { - // Unable to observe the conflicting write, so we skip this shard and keep the assignment result. - s.logger.Warn("failed to refresh shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } + payload, err := json.Marshal(update.metrics) + if err != nil { + s.logger.Warn( + "failed to marshal shard metrics after assignment", + tag.ShardNamespace(namespace), + tag.ShardKey(update.shardID), + tag.Error(err), + ) + continue + } - update.modRevision = 0 - if len(metricsResp.Kvs) > 0 { - update.modRevision = metricsResp.Kvs[0].ModRevision - if err := json.Unmarshal(metricsResp.Kvs[0].Value, &update.metrics); err != nil { - // If the value is corrupt we cannot safely merge, so we abandon this shard's metrics update. - s.logger.Warn("failed to unmarshal shard metrics after compare conflict", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err)) - break - } - } else { - update.metrics = store.ShardMetrics{ - SmoothedLoad: 0, - LastUpdateTime: update.defaultLastUpdate, - } - update.modRevision = 0 - } + if _, err := s.client.Put(ctx, update.key, string(payload)); err != nil { + s.logger.Warn( + "failed to update shard metrics", + tag.ShardNamespace(namespace), + tag.ShardKey(update.shardID), + tag.Error(err), + ) } } } From 9ffcefb2498be1b94949fe3fc14d111ba799a6ce Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:18:38 +0200 Subject: [PATCH 43/62] refactor(shard distributor): ShardMetrics renamed to ShardStatistics. And more idiomatic naming of collection vs singular type Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index c465102194a..ea26fd4b510 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -51,7 +51,7 @@ type shardStatisticsUpdate struct { type shardMetricsUpdate struct { key string shardID string - metrics store.ShardMetrics + metrics store.ShardStatistics desiredLastMove int64 // intended LastMoveTime for this update } @@ -416,7 +416,7 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } -func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { +func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { var updates []shardMetricsUpdate for executorID, state := range newAssignments { @@ -433,31 +433,31 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name continue } - shardMetricsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardMetricsKey) + shardStatisticsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) if err != nil { - return nil, fmt.Errorf("build shard metrics key: %w", err) + return nil, fmt.Errorf("build shard statistics key: %w", err) } - metricsResp, err := s.client.Get(ctx, shardMetricsKey) + statsResp, err := s.client.Get(ctx, shardStatisticsKey) if err != nil { - return nil, fmt.Errorf("get shard metrics: %w", err) + return nil, fmt.Errorf("get shard statistics: %w", err) } - metrics := store.ShardMetrics{} + stats := store.ShardStatistics{} - if len(metricsResp.Kvs) > 0 { - if err := json.Unmarshal(metricsResp.Kvs[0].Value, &metrics); err != nil { - return nil, fmt.Errorf("unmarshal shard metrics: %w", err) + if len(statsResp.Kvs) > 0 { + if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + return nil, fmt.Errorf("unmarshal shard statistics: %w", err) } } else { - metrics.SmoothedLoad = 0 - metrics.LastUpdateTime = now + stats.SmoothedLoad = 0 + stats.LastUpdateTime = now } updates = append(updates, shardMetricsUpdate{ - key: shardMetricsKey, + key: shardStatisticsKey, shardID: shardID, - metrics: metrics, + metrics: stats, desiredLastMove: now, }) } @@ -466,16 +466,16 @@ func (s *executorStoreImpl) prepareShardMetricsUpdates(ctx context.Context, name return updates, nil } -// applyShardMetricsUpdates updates shard metrics. +// applyShardStatisticsUpdates updates shard statistics. // Is intentionally made tolerant of failures since the data is telemetry only. -func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { +func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { for _, update := range updates { update.metrics.LastMoveTime = update.desiredLastMove payload, err := json.Marshal(update.metrics) if err != nil { s.logger.Warn( - "failed to marshal shard metrics after assignment", + "failed to marshal shard statistics after assignment", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err), @@ -485,7 +485,7 @@ func (s *executorStoreImpl) applyShardMetricsUpdates(ctx context.Context, namesp if _, err := s.client.Put(ctx, update.key, string(payload)); err != nil { s.logger.Warn( - "failed to update shard metrics", + "failed to update shard statistics", tag.ShardNamespace(namespace), tag.ShardKey(update.shardID), tag.Error(err), From cc769bf154f2b545cd25fb307c52e023385edbdd Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 08:31:52 +0100 Subject: [PATCH 44/62] refactor(shard distributor): found a place where I forgot to rename to "statistics" Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index ea26fd4b510..ec71c263cc2 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -51,7 +51,7 @@ type shardStatisticsUpdate struct { type shardMetricsUpdate struct { key string shardID string - metrics store.ShardStatistics + stats store.ShardStatistics desiredLastMove int64 // intended LastMoveTime for this update } @@ -416,8 +416,8 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } -func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardMetricsUpdate, error) { - var updates []shardMetricsUpdate +func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardStatisticsUpdate, error) { + var updates []shardStatisticsUpdate for executorID, state := range newAssignments { for shardID := range state.AssignedShards { @@ -454,10 +454,10 @@ func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, n stats.LastUpdateTime = now } - updates = append(updates, shardMetricsUpdate{ + updates = append(updates, shardStatisticsUpdate{ key: shardStatisticsKey, shardID: shardID, - metrics: stats, + stats: stats, desiredLastMove: now, }) } @@ -468,11 +468,11 @@ func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, n // applyShardStatisticsUpdates updates shard statistics. // Is intentionally made tolerant of failures since the data is telemetry only. -func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardMetricsUpdate) { +func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardStatisticsUpdate) { for _, update := range updates { - update.metrics.LastMoveTime = update.desiredLastMove + update.stats.LastMoveTime = update.desiredLastMove - payload, err := json.Marshal(update.metrics) + payload, err := json.Marshal(update.stats) if err != nil { s.logger.Warn( "failed to marshal shard statistics after assignment", From 8c22663cf77b47a4e83bd6cffe9c25200398fc1d Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 18:20:49 +0100 Subject: [PATCH 45/62] fix(shard distributor): move non-exported helpers to end of file to follow conventions Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 78 ------------------- 1 file changed, 78 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index ec71c263cc2..bbbadcd3e5c 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -416,84 +416,6 @@ func (s *executorStoreImpl) AssignShards(ctx context.Context, namespace string, return nil } -func (s *executorStoreImpl) prepareShardStatisticsUpdates(ctx context.Context, namespace string, newAssignments map[string]store.AssignedState) ([]shardStatisticsUpdate, error) { - var updates []shardStatisticsUpdate - - for executorID, state := range newAssignments { - for shardID := range state.AssignedShards { - now := s.timeSource.Now().Unix() - - oldOwner, err := s.shardCache.GetShardOwner(ctx, namespace, shardID) - if err != nil && !errors.Is(err, store.ErrShardNotFound) { - return nil, fmt.Errorf("lookup cached shard owner: %w", err) - } - - // we should just skip if the owner hasn't changed - if err == nil && oldOwner == executorID { - continue - } - - shardStatisticsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) - if err != nil { - return nil, fmt.Errorf("build shard statistics key: %w", err) - } - - statsResp, err := s.client.Get(ctx, shardStatisticsKey) - if err != nil { - return nil, fmt.Errorf("get shard statistics: %w", err) - } - - stats := store.ShardStatistics{} - - if len(statsResp.Kvs) > 0 { - if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { - return nil, fmt.Errorf("unmarshal shard statistics: %w", err) - } - } else { - stats.SmoothedLoad = 0 - stats.LastUpdateTime = now - } - - updates = append(updates, shardStatisticsUpdate{ - key: shardStatisticsKey, - shardID: shardID, - stats: stats, - desiredLastMove: now, - }) - } - } - - return updates, nil -} - -// applyShardStatisticsUpdates updates shard statistics. -// Is intentionally made tolerant of failures since the data is telemetry only. -func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, namespace string, updates []shardStatisticsUpdate) { - for _, update := range updates { - update.stats.LastMoveTime = update.desiredLastMove - - payload, err := json.Marshal(update.stats) - if err != nil { - s.logger.Warn( - "failed to marshal shard statistics after assignment", - tag.ShardNamespace(namespace), - tag.ShardKey(update.shardID), - tag.Error(err), - ) - continue - } - - if _, err := s.client.Put(ctx, update.key, string(payload)); err != nil { - s.logger.Warn( - "failed to update shard statistics", - tag.ShardNamespace(namespace), - tag.ShardKey(update.shardID), - tag.Error(err), - ) - } - } -} - func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID, executorID string) error { assignedState, err := etcdkeys.BuildExecutorKey(s.prefix, namespace, executorID, etcdkeys.ExecutorAssignedStateKey) if err != nil { From 5ac3c5db1cb737b7947794b4247a90b9d4cb56da Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:48:15 +0100 Subject: [PATCH 46/62] test(shard distributor): add test case for when shard stats are deleted Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../leader/process/processor_test.go | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/service/sharddistributor/leader/process/processor_test.go b/service/sharddistributor/leader/process/processor_test.go index 1c6e0422caf..4f35d8072d1 100644 --- a/service/sharddistributor/leader/process/processor_test.go +++ b/service/sharddistributor/leader/process/processor_test.go @@ -254,6 +254,55 @@ func TestCleanupStaleShardStats(t *testing.T) { } +func TestCleanupStaleShardStats(t *testing.T) { + t.Run("stale shard stats are deleted", func(t *testing.T) { + mocks := setupProcessorTest(t, config.NamespaceTypeFixed) + defer mocks.ctrl.Finish() + processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) + + now := mocks.timeSource.Now() + + heartbeats := map[string]store.HeartbeatState{ + "exec-active": {LastHeartbeat: now.Unix(), Status: types.ExecutorStatusACTIVE}, + "exec-stale": {LastHeartbeat: now.Add(-2 * time.Second).Unix()}, + } + + assignments := map[string]store.AssignedState{ + "exec-active": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard-1": {Status: types.AssignmentStatusREADY}, + "shard-2": {Status: types.AssignmentStatusREADY}, + }, + }, + "exec-stale": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard-3": {Status: types.AssignmentStatusREADY}, + }, + }, + } + + shardStats := map[string]store.ShardStatistics{ + "shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + "shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + } + + namespaceState := &store.NamespaceState{ + Executors: heartbeats, + ShardAssignments: assignments, + ShardStats: shardStats, + } + + gomock.InOrder( + mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(namespaceState, nil), + mocks.election.EXPECT().Guard().Return(store.NopGuard()), + mocks.store.EXPECT().DeleteShardStats(gomock.Any(), mocks.cfg.Name, []string{"shard-3"}, gomock.Any()).Return(nil), + ) + processor.cleanupStaleShardStats(context.Background()) + }) + +} + func TestRebalance_StoreErrors(t *testing.T) { mocks := setupProcessorTest(t, config.NamespaceTypeFixed) defer mocks.ctrl.Finish() From 3973b82781f9cf90f5d38ffafc446e90896f1292 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Thu, 30 Oct 2025 09:33:01 +0100 Subject: [PATCH 47/62] feat(shard distributor): retain shard stats while shards are within heartbeat TTL Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../leader/process/processor_test.go | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/service/sharddistributor/leader/process/processor_test.go b/service/sharddistributor/leader/process/processor_test.go index 4f35d8072d1..0e7cede9481 100644 --- a/service/sharddistributor/leader/process/processor_test.go +++ b/service/sharddistributor/leader/process/processor_test.go @@ -284,7 +284,7 @@ func TestCleanupStaleShardStats(t *testing.T) { shardStats := map[string]store.ShardStatistics{ "shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, "shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Add(-2 * time.Second).Unix(), LastMoveTime: now.Add(-2 * time.Second).Unix()}, } namespaceState := &store.NamespaceState{ @@ -301,6 +301,30 @@ func TestCleanupStaleShardStats(t *testing.T) { processor.cleanupStaleShardStats(context.Background()) }) + t.Run("recent shard stats are preserved", func(t *testing.T) { + mocks := setupProcessorTest(t, config.NamespaceTypeFixed) + defer mocks.ctrl.Finish() + processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) + + now := mocks.timeSource.Now() + + expiredExecutor := now.Add(-2 * time.Second).Unix() + state := &store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "exec-stale": {LastHeartbeat: expiredExecutor}, + }, + ShardAssignments: map[string]store.AssignedState{}, + ShardStats: map[string]store.ShardStatistics{ + "shard-1": {SmoothedLoad: 5.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, + }, + } + + mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(state, nil) + processor.cleanupStaleShardStats(context.Background()) + + // No delete expected since stats are recent. + }) + } func TestRebalance_StoreErrors(t *testing.T) { From 3830d5ede91c7f1ff87102eaba67b9f0ddc2755a Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:26:41 +0100 Subject: [PATCH 48/62] feat: function to update shard statistics from heartbeat (currently no ewma) Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index bbbadcd3e5c..94783bd4eda 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -8,6 +8,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "strconv" "time" @@ -162,9 +163,94 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec if err != nil { return fmt.Errorf("record heartbeat: %w", err) } + + s.updateShardStatisticsFromHeartbeat(ctx, namespace, executorID, request.ReportedShards) + return nil } +func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { + if len(reported) == 0 { + return + } + + now := s.timeSource.Now().Unix() + + for shardID, report := range reported { + if report == nil { + continue + } + + load := report.ShardLoad + if math.IsNaN(load) || math.IsInf(load, 0) { + continue + } + + shardStatsKey, err := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey) + if err != nil { + s.logger.Warn( + "failed to build shard statistics key from heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + statsResp, err := s.client.Get(ctx, shardStatsKey) + if err != nil { + s.logger.Warn( + "failed to read shard statistics for heartbeat update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + var stats store.ShardStatistics + if len(statsResp.Kvs) > 0 { + if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + s.logger.Warn( + "failed to unmarshal shard statistics for heartbeat update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + } + + stats.SmoothedLoad = load + stats.LastUpdateTime = now + + payload, err := json.Marshal(stats) + if err != nil { + s.logger.Warn( + "failed to marshal shard statistics after heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + continue + } + + if _, err := s.client.Put(ctx, shardStatsKey, string(payload)); err != nil { + s.logger.Warn( + "failed to persist shard statistics from heartbeat", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + tag.Error(err), + ) + } + } +} + // GetHeartbeat retrieves the last known heartbeat state for a single executor. func (s *executorStoreImpl) GetHeartbeat(ctx context.Context, namespace string, executorID string) (*store.HeartbeatState, *store.AssignedState, error) { // The prefix for all keys related to a single executor. From 443c0b14c6e16a847277038d17696d14e80fc98d Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:29:08 +0100 Subject: [PATCH 49/62] test(shard distributor): add tests to verify statistics are updated at heartbeat Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../etcd/executorstore/etcdstore_test.go | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index a47565768bd..f7476cb04a1 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -91,6 +91,93 @@ func TestRecordHeartbeat(t *testing.T) { assert.Equal(t, "value-2", string(resp.Kvs[0].Value)) } +func TestRecordHeartbeatUpdatesShardStatistics(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "executor-shard-stats" + shardID := "shard-with-load" + + initialStats := store.ShardStatistics{ + SmoothedLoad: 1.23, + LastUpdateTime: 10, + LastMoveTime: 123, + } + + shardStatsKey, err := etcdkeys.BuildShardKey(tc.EtcdPrefix, tc.Namespace, shardID, etcdkeys.ShardStatisticsKey) + require.NoError(t, err) + payload, err := json.Marshal(initialStats) + require.NoError(t, err) + _, err = tc.Client.Put(ctx, shardStatsKey, string(payload)) + require.NoError(t, err) + + nowTS := time.Now().Unix() + + req := store.HeartbeatState{ + LastHeartbeat: nowTS, + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + shardID: { + Status: types.ShardStatusREADY, + ShardLoad: 45.6, + }, + }, + } + + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, req)) + + nsState, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + + require.Contains(t, nsState.ShardStats, shardID) + updated := nsState.ShardStats[shardID] + + assert.InDelta(t, 45.6, updated.SmoothedLoad, 1e-9) + assert.GreaterOrEqual(t, updated.LastUpdateTime, nowTS) + assert.Equal(t, initialStats.LastMoveTime, updated.LastMoveTime) +} + +func TestRecordHeartbeatSkipsShardStatisticsWithNilReport(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + executorStore := createStore(t, tc) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "executor-missing-load" + validShardID := "shard-with-valid-load" + skippedShardID := "shard-missing-load" + + nowTS := time.Now().Unix() + + req := store.HeartbeatState{ + LastHeartbeat: nowTS, + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + validShardID: { + Status: types.ShardStatusREADY, + ShardLoad: 3.21, + }, + skippedShardID: nil, + }, + } + + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, req)) + + nsState, err := executorStore.GetState(ctx, tc.Namespace) + require.NoError(t, err) + + require.Contains(t, nsState.ShardStats, validShardID) + validStats := nsState.ShardStats[validShardID] + assert.InDelta(t, 3.21, validStats.SmoothedLoad, 1e-9) + assert.Greater(t, validStats.LastUpdateTime, int64(0)) + + assert.NotContains(t, nsState.ShardStats, skippedShardID) +} + func TestGetHeartbeat(t *testing.T) { tc := testhelper.SetupStoreTestCluster(t) executorStore := createStore(t, tc) From 9d159e744b57eff0064ab9a657db9d3350f61029 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 18:27:47 +0100 Subject: [PATCH 50/62] feat(shard distributor): calculate smoothed load (ewma) using the ShardStatistics Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 94783bd4eda..edb257afe2a 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -224,7 +224,8 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte } } - stats.SmoothedLoad = load + // Update smoothed load via EWMA. + stats.SmoothedLoad = ewmaSmoothedLoad(stats.SmoothedLoad, load, stats.LastUpdateTime, now) stats.LastUpdateTime = now payload, err := json.Marshal(stats) @@ -812,3 +813,20 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam } } } + +// ewmaSmoothedLoad computes an EWMA for shard load. +func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { + const tauSeconds = 30.0 // smaller = more responsive, larger = smoother, slower + if lastUpdate <= 0 { + return current + } + dt := now - lastUpdate + if dt < 0 { + dt = 0 + } + if tauSeconds <= 0 { + return current + } + alpha := 1 - math.Exp(-float64(dt)/tauSeconds) + return (1-alpha)*prev + alpha*current +} From 18e63b7049d5eb391cd54b9cf94fed7e148dfae7 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Mon, 27 Oct 2025 19:52:59 +0100 Subject: [PATCH 51/62] fix(shard distributor): log invalid shard load Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../sharddistributor/store/etcd/executorstore/etcdstore.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index edb257afe2a..eee95defbe4 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -183,6 +183,12 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte load := report.ShardLoad if math.IsNaN(load) || math.IsInf(load, 0) { + s.logger.Warn( + "invalid shard load reported; skipping EWMA update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + ) continue } From e08a28644383227c5cdb490796093f1b2961bd23 Mon Sep 17 00:00:00 2001 From: Theis Randeris Mathiassen Date: Sun, 2 Nov 2025 17:00:02 +0100 Subject: [PATCH 52/62] chore: added logger warning and simplified ewma calculation Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index eee95defbe4..124d7e951d3 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -178,6 +178,11 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte for shardID, report := range reported { if report == nil { + s.logger.Warn("empty report; skipping EWMA update", + tag.ShardNamespace(namespace), + tag.ShardExecutor(executorID), + tag.ShardKey(shardID), + ) continue } @@ -820,19 +825,12 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam } } -// ewmaSmoothedLoad computes an EWMA for shard load. func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { const tauSeconds = 30.0 // smaller = more responsive, larger = smoother, slower - if lastUpdate <= 0 { - return current - } - dt := now - lastUpdate - if dt < 0 { - dt = 0 - } - if tauSeconds <= 0 { + if lastUpdate <= 0 || tauSeconds <= 0 { return current } + dt := max(now-lastUpdate, 0) alpha := 1 - math.Exp(-float64(dt)/tauSeconds) return (1-alpha)*prev + alpha*current } From 08eb635dcc6e29096b5095bf8493e2d3d513136c Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Thu, 6 Nov 2025 15:52:56 +0100 Subject: [PATCH 53/62] fix: remove duplicate test introduced in merge Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Signed-off-by: Theis Randeris Mathiassen --- .../leader/process/processor_test.go | 73 ------------------- 1 file changed, 73 deletions(-) diff --git a/service/sharddistributor/leader/process/processor_test.go b/service/sharddistributor/leader/process/processor_test.go index 0e7cede9481..1c6e0422caf 100644 --- a/service/sharddistributor/leader/process/processor_test.go +++ b/service/sharddistributor/leader/process/processor_test.go @@ -254,79 +254,6 @@ func TestCleanupStaleShardStats(t *testing.T) { } -func TestCleanupStaleShardStats(t *testing.T) { - t.Run("stale shard stats are deleted", func(t *testing.T) { - mocks := setupProcessorTest(t, config.NamespaceTypeFixed) - defer mocks.ctrl.Finish() - processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) - - now := mocks.timeSource.Now() - - heartbeats := map[string]store.HeartbeatState{ - "exec-active": {LastHeartbeat: now.Unix(), Status: types.ExecutorStatusACTIVE}, - "exec-stale": {LastHeartbeat: now.Add(-2 * time.Second).Unix()}, - } - - assignments := map[string]store.AssignedState{ - "exec-active": { - AssignedShards: map[string]*types.ShardAssignment{ - "shard-1": {Status: types.AssignmentStatusREADY}, - "shard-2": {Status: types.AssignmentStatusREADY}, - }, - }, - "exec-stale": { - AssignedShards: map[string]*types.ShardAssignment{ - "shard-3": {Status: types.AssignmentStatusREADY}, - }, - }, - } - - shardStats := map[string]store.ShardStatistics{ - "shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - "shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Add(-2 * time.Second).Unix(), LastMoveTime: now.Add(-2 * time.Second).Unix()}, - } - - namespaceState := &store.NamespaceState{ - Executors: heartbeats, - ShardAssignments: assignments, - ShardStats: shardStats, - } - - gomock.InOrder( - mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(namespaceState, nil), - mocks.election.EXPECT().Guard().Return(store.NopGuard()), - mocks.store.EXPECT().DeleteShardStats(gomock.Any(), mocks.cfg.Name, []string{"shard-3"}, gomock.Any()).Return(nil), - ) - processor.cleanupStaleShardStats(context.Background()) - }) - - t.Run("recent shard stats are preserved", func(t *testing.T) { - mocks := setupProcessorTest(t, config.NamespaceTypeFixed) - defer mocks.ctrl.Finish() - processor := mocks.factory.CreateProcessor(mocks.cfg, mocks.store, mocks.election).(*namespaceProcessor) - - now := mocks.timeSource.Now() - - expiredExecutor := now.Add(-2 * time.Second).Unix() - state := &store.NamespaceState{ - Executors: map[string]store.HeartbeatState{ - "exec-stale": {LastHeartbeat: expiredExecutor}, - }, - ShardAssignments: map[string]store.AssignedState{}, - ShardStats: map[string]store.ShardStatistics{ - "shard-1": {SmoothedLoad: 5.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - }, - } - - mocks.store.EXPECT().GetState(gomock.Any(), mocks.cfg.Name).Return(state, nil) - processor.cleanupStaleShardStats(context.Background()) - - // No delete expected since stats are recent. - }) - -} - func TestRebalance_StoreErrors(t *testing.T) { mocks := setupProcessorTest(t, config.NamespaceTypeFixed) defer mocks.ctrl.Finish() From f63664aa6ea87665204304540fa57c1e4faa215d Mon Sep 17 00:00:00 2001 From: Theis Randeris Mathiassen Date: Tue, 11 Nov 2025 11:16:15 +0100 Subject: [PATCH 54/62] chore: consistent error checking, and rename function Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 124d7e951d3..915d3f36ba9 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -164,12 +164,12 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec return fmt.Errorf("record heartbeat: %w", err) } - s.updateShardStatisticsFromHeartbeat(ctx, namespace, executorID, request.ReportedShards) + s.recordShardStatistics(ctx, namespace, executorID, request.ReportedShards) return nil } -func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { +func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { if len(reported) == 0 { return } @@ -223,7 +223,8 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte var stats store.ShardStatistics if len(statsResp.Kvs) > 0 { - if err := json.Unmarshal(statsResp.Kvs[0].Value, &stats); err != nil { + err := json.Unmarshal(statsResp.Kvs[0].Value, &stats) + if err != nil { s.logger.Warn( "failed to unmarshal shard statistics for heartbeat update", tag.ShardNamespace(namespace), @@ -251,7 +252,8 @@ func (s *executorStoreImpl) updateShardStatisticsFromHeartbeat(ctx context.Conte continue } - if _, err := s.client.Put(ctx, shardStatsKey, string(payload)); err != nil { + _, err = s.client.Put(ctx, shardStatsKey, string(payload)) + if err != nil { s.logger.Warn( "failed to persist shard statistics from heartbeat", tag.ShardNamespace(namespace), @@ -826,7 +828,7 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam } func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { - const tauSeconds = 30.0 // smaller = more responsive, larger = smoother, slower + const tauSeconds = 30.0 // smaller = more responsive, larger = smoother if lastUpdate <= 0 || tauSeconds <= 0 { return current } From 10e2ffa9bd28d8fa2c2e4b54cac38ffc82a89efa Mon Sep 17 00:00:00 2001 From: Theis Randeris Mathiassen Date: Tue, 11 Nov 2025 12:07:23 +0100 Subject: [PATCH 55/62] chore: added decompress to unmarshal Signed-off-by: Theis Randeris Mathiassen --- service/sharddistributor/store/etcd/executorstore/etcdstore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 915d3f36ba9..8e485b5045a 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -223,7 +223,7 @@ func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace var stats store.ShardStatistics if len(statsResp.Kvs) > 0 { - err := json.Unmarshal(statsResp.Kvs[0].Value, &stats) + err := common.DecompressAndUnmarshal(statsResp.Kvs[0].Value, &stats) if err != nil { s.logger.Warn( "failed to unmarshal shard statistics for heartbeat update", From 8c6b0c8dcba80b13a91455eca4c53c95c617f897 Mon Sep 17 00:00:00 2001 From: Theis Randeris Mathiassen Date: Tue, 11 Nov 2025 15:46:47 +0100 Subject: [PATCH 56/62] chore: removed an old struct that appeared during rebase Signed-off-by: Theis Randeris Mathiassen --- .../store/etcd/executorstore/etcdstore.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 8e485b5045a..8d721450a5b 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -47,15 +47,6 @@ type shardStatisticsUpdate struct { desiredLastMove int64 // intended LastMoveTime for this update } -// shardMetricsUpdate tracks the etcd key, revision, and metrics used to update a shard -// after the main transaction in AssignShards for exec state. -type shardMetricsUpdate struct { - key string - shardID string - stats store.ShardStatistics - desiredLastMove int64 // intended LastMoveTime for this update -} - // ExecutorStoreParams defines the dependencies for the etcd store, for use with fx. type ExecutorStoreParams struct { fx.In From 158e0307b89cd5d3f805972c41e9d4f41648967d Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Thu, 13 Nov 2025 13:24:04 +0100 Subject: [PATCH 57/62] feat(shard distributor): throttle shard-stat writes Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore.go | 50 +++++++++-- .../etcd/executorstore/etcdstore_test.go | 85 +++++++++++++++++++ 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 8d721450a5b..7dcb6698f82 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -36,8 +36,15 @@ type executorStoreImpl struct { logger log.Logger shardCache *shardcache.ShardToExecutorCache timeSource clock.TimeSource + // Max interval (seconds) before we force a shard-stat persist. + maxStatsPersistIntervalSeconds int64 } +// Constants for gating shard statistics writes to reduce etcd load. +const ( + shardStatsEpsilon = 0.05 +) + // shardStatisticsUpdate holds the staged statistics for a shard so we can write them // to etcd after the main AssignShards transaction commits. type shardStatisticsUpdate struct { @@ -90,11 +97,12 @@ func NewStore(p ExecutorStoreParams) (store.Store, error) { } store := &executorStoreImpl{ - client: etcdClient, - prefix: etcdCfg.Prefix, - logger: p.Logger, - shardCache: shardCache, - timeSource: timeSource, + client: etcdClient, + prefix: etcdCfg.Prefix, + logger: p.Logger, + shardCache: shardCache, + timeSource: timeSource, + maxStatsPersistIntervalSeconds: deriveStatsPersistInterval(p.Cfg.Process.HeartbeatTTL), } p.Lifecycle.Append(fx.StartStopHook(store.Start, store.Stop)) @@ -160,6 +168,15 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec return nil } +func deriveStatsPersistInterval(heartbeatTTL time.Duration) int64 { + ttlSeconds := int64(heartbeatTTL.Seconds()) + interval := ttlSeconds - 1 + if interval < 1 { + interval = 1 + } + return interval +} + func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { if len(reported) == 0 { return @@ -228,7 +245,28 @@ func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace } // Update smoothed load via EWMA. - stats.SmoothedLoad = ewmaSmoothedLoad(stats.SmoothedLoad, load, stats.LastUpdateTime, now) + prevSmoothed := stats.SmoothedLoad + prevUpdate := stats.LastUpdateTime + newSmoothed := ewmaSmoothedLoad(prevSmoothed, load, prevUpdate, now) + + // Decide whether to persist this update. We always persist if this is the + // first observation (prevUpdate == 0). Otherwise, if the change is small + // and the previous persist is recent, skip the write to reduce etcd load. + shouldPersist := true + if prevUpdate > 0 { + age := now - prevUpdate + delta := math.Abs(newSmoothed - prevSmoothed) + if delta < shardStatsEpsilon && age < s.maxStatsPersistIntervalSeconds { + shouldPersist = false + } + } + + if !shouldPersist { + // Skip persisting, proceed to next shard. + continue + } + + stats.SmoothedLoad = newSmoothed stats.LastUpdateTime = now payload, err := json.Marshal(stats) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index f7476cb04a1..43dc4204d65 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/fx/fxtest" + "github.com/uber/cadence/common/clock" "github.com/uber/cadence/common/log/testlogger" "github.com/uber/cadence/common/types" "github.com/uber/cadence/service/sharddistributor/store" @@ -178,6 +179,66 @@ func TestRecordHeartbeatSkipsShardStatisticsWithNilReport(t *testing.T) { assert.NotContains(t, nsState.ShardStats, skippedShardID) } +func TestRecordHeartbeatShardStatisticsThrottlesWrites(t *testing.T) { + tc := testhelper.SetupStoreTestCluster(t) + tc.LeaderCfg.Process.HeartbeatTTL = 10 * time.Second + mockTS := clock.NewMockedTimeSourceAt(time.Unix(1000, 0)) + executorStore := createStoreWithTimeSource(t, tc, mockTS) + esImpl, ok := executorStore.(*executorStoreImpl) + require.True(t, ok, "unexpected store implementation") + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + executorID := "executor-shard-stats-throttle" + shardID := "shard-stats-throttle" + + baseLoad := 0.40 + smallDelta := shardStatsEpsilon / 2 + intervalSeconds := esImpl.maxStatsPersistIntervalSeconds + halfIntervalSeconds := intervalSeconds / 2 + if halfIntervalSeconds == 0 { + halfIntervalSeconds = 1 + } + + // First heartbeat should always persist stats. + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, store.HeartbeatState{ + LastHeartbeat: mockTS.Now().Unix(), + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + shardID: {Status: types.ShardStatusREADY, ShardLoad: baseLoad}, + }, + })) + statsAfterFirst := getShardStats(t, executorStore, ctx, tc.Namespace, shardID) + require.NotNil(t, statsAfterFirst) + + // Advance time by less than the persist interval and provide a small delta: should skip the write. + mockTS.Advance(time.Duration(halfIntervalSeconds) * time.Second) + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, store.HeartbeatState{ + LastHeartbeat: mockTS.Now().Unix(), + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + shardID: {Status: types.ShardStatusREADY, ShardLoad: baseLoad + smallDelta}, + }, + })) + statsAfterSkip := getShardStats(t, executorStore, ctx, tc.Namespace, shardID) + require.NotNil(t, statsAfterSkip) + assert.Equal(t, statsAfterFirst.LastUpdateTime, statsAfterSkip.LastUpdateTime, "small recent deltas should not trigger a persist") + + // Advance time beyond the max persist interval, even small deltas should now persist. + mockTS.Advance(time.Duration(intervalSeconds) * time.Second) + require.NoError(t, executorStore.RecordHeartbeat(ctx, tc.Namespace, executorID, store.HeartbeatState{ + LastHeartbeat: mockTS.Now().Unix(), + Status: types.ExecutorStatusACTIVE, + ReportedShards: map[string]*types.ShardStatusReport{ + shardID: {Status: types.ShardStatusREADY, ShardLoad: baseLoad + smallDelta/2}, + }, + })) + statsAfterForce := getShardStats(t, executorStore, ctx, tc.Namespace, shardID) + require.NotNil(t, statsAfterForce) + assert.Greater(t, statsAfterForce.LastUpdateTime, statsAfterSkip.LastUpdateTime, "stale stats must be refreshed even if delta is small") +} + func TestGetHeartbeat(t *testing.T) { tc := testhelper.SetupStoreTestCluster(t) executorStore := createStore(t, tc) @@ -695,3 +756,27 @@ func createStore(t *testing.T, tc *testhelper.StoreTestCluster) store.Store { require.NoError(t, err) return store } + +func createStoreWithTimeSource(t *testing.T, tc *testhelper.StoreTestCluster, ts clock.TimeSource) store.Store { + t.Helper() + store, err := NewStore(ExecutorStoreParams{ + Client: tc.Client, + Cfg: tc.LeaderCfg, + Lifecycle: fxtest.NewLifecycle(t), + Logger: testlogger.New(t), + TimeSource: ts, + }) + require.NoError(t, err) + return store +} + +func getShardStats(t *testing.T, s store.Store, ctx context.Context, namespace, shardID string) *store.ShardStatistics { + t.Helper() + nsState, err := s.GetState(ctx, namespace) + require.NoError(t, err) + stats, ok := nsState.ShardStats[shardID] + if !ok { + return nil + } + return &stats +} From 05e0d1d29716d068e7d60ad81e8753089e863989 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Thu, 13 Nov 2025 14:17:33 +0100 Subject: [PATCH 58/62] fix(shard distributor): linter error Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- .../store/etcd/executorstore/etcdstore_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index 43dc4204d65..51237d81ee5 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -209,7 +209,7 @@ func TestRecordHeartbeatShardStatisticsThrottlesWrites(t *testing.T) { shardID: {Status: types.ShardStatusREADY, ShardLoad: baseLoad}, }, })) - statsAfterFirst := getShardStats(t, executorStore, ctx, tc.Namespace, shardID) + statsAfterFirst := getShardStats(ctx, t, executorStore, tc.Namespace, shardID) require.NotNil(t, statsAfterFirst) // Advance time by less than the persist interval and provide a small delta: should skip the write. @@ -221,7 +221,7 @@ func TestRecordHeartbeatShardStatisticsThrottlesWrites(t *testing.T) { shardID: {Status: types.ShardStatusREADY, ShardLoad: baseLoad + smallDelta}, }, })) - statsAfterSkip := getShardStats(t, executorStore, ctx, tc.Namespace, shardID) + statsAfterSkip := getShardStats(ctx, t, executorStore, tc.Namespace, shardID) require.NotNil(t, statsAfterSkip) assert.Equal(t, statsAfterFirst.LastUpdateTime, statsAfterSkip.LastUpdateTime, "small recent deltas should not trigger a persist") @@ -234,7 +234,7 @@ func TestRecordHeartbeatShardStatisticsThrottlesWrites(t *testing.T) { shardID: {Status: types.ShardStatusREADY, ShardLoad: baseLoad + smallDelta/2}, }, })) - statsAfterForce := getShardStats(t, executorStore, ctx, tc.Namespace, shardID) + statsAfterForce := getShardStats(ctx, t, executorStore, tc.Namespace, shardID) require.NotNil(t, statsAfterForce) assert.Greater(t, statsAfterForce.LastUpdateTime, statsAfterSkip.LastUpdateTime, "stale stats must be refreshed even if delta is small") } @@ -770,7 +770,7 @@ func createStoreWithTimeSource(t *testing.T, tc *testhelper.StoreTestCluster, ts return store } -func getShardStats(t *testing.T, s store.Store, ctx context.Context, namespace, shardID string) *store.ShardStatistics { +func getShardStats(ctx context.Context, t *testing.T, s store.Store, namespace, shardID string) *store.ShardStatistics { t.Helper() nsState, err := s.GetState(ctx, namespace) require.NoError(t, err) From e0779ec94137f9a04bdb961b9e429993c6acd9e3 Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Tue, 18 Nov 2025 16:18:44 +0100 Subject: [PATCH 59/62] feat(shard distributor): decouple shard stats write-throttling decision from heartbeat TTL Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- common/config/config.go | 5 +++-- config/development.yaml | 1 + service/sharddistributor/config/config.go | 9 +++++++-- .../leader/process/processor.go | 9 ++++++--- .../leader/process/processor_test.go | 7 ++++--- .../store/etcd/executorstore/etcdstore.go | 18 +++++++++++------- .../store/etcd/executorstore/etcdstore_test.go | 1 + 7 files changed, 33 insertions(+), 17 deletions(-) diff --git a/common/config/config.go b/common/config/config.go index 904a2f7eb8e..a9e510d8ad0 100644 --- a/common/config/config.go +++ b/common/config/config.go @@ -658,8 +658,9 @@ type ( } LeaderProcess struct { - Period time.Duration `yaml:"period"` - HeartbeatTTL time.Duration `yaml:"heartbeatTTL"` + Period time.Duration `yaml:"period"` + HeartbeatTTL time.Duration `yaml:"heartbeatTTL"` + ShardStatsTTL time.Duration `yaml:"shardStatsTTL"` } ) diff --git a/config/development.yaml b/config/development.yaml index d485c7ac4ba..1f34cb3d2a0 100644 --- a/config/development.yaml +++ b/config/development.yaml @@ -186,3 +186,4 @@ shardDistribution: process: period: 1s heartbeatTTL: 2s + shardStatsTTL: 60s diff --git a/service/sharddistributor/config/config.go b/service/sharddistributor/config/config.go index 79106ffc635..4ae80e489fc 100644 --- a/service/sharddistributor/config/config.go +++ b/service/sharddistributor/config/config.go @@ -79,8 +79,9 @@ type ( } LeaderProcess struct { - Period time.Duration `yaml:"period"` - HeartbeatTTL time.Duration `yaml:"heartbeatTTL"` + Period time.Duration `yaml:"period"` + HeartbeatTTL time.Duration `yaml:"heartbeatTTL"` + ShardStatsTTL time.Duration `yaml:"shardStatsTTL"` } ) @@ -97,6 +98,10 @@ const ( MigrationModeONBOARDED = "onboarded" ) +const ( + DefaultShardStatsTTL = time.Minute +) + // ConfigMode maps string migration mode values to types.MigrationMode var ConfigMode = map[string]types.MigrationMode{ MigrationModeINVALID: types.MigrationModeINVALID, diff --git a/service/sharddistributor/leader/process/processor.go b/service/sharddistributor/leader/process/processor.go index 5b9e717fb24..4eeca1c926e 100644 --- a/service/sharddistributor/leader/process/processor.go +++ b/service/sharddistributor/leader/process/processor.go @@ -75,12 +75,15 @@ func NewProcessorFactory( timeSource clock.TimeSource, cfg config.ShardDistribution, ) Factory { - if cfg.Process.Period == 0 { + if cfg.Process.Period <= 0 { cfg.Process.Period = _defaultPeriod } - if cfg.Process.HeartbeatTTL == 0 { + if cfg.Process.HeartbeatTTL <= 0 { cfg.Process.HeartbeatTTL = _defaultHearbeatTTL } + if cfg.Process.ShardStatsTTL <= 0 { + cfg.Process.ShardStatsTTL = config.DefaultShardStatsTTL + } return &processorFactory{ logger: logger, @@ -277,7 +280,7 @@ func (p *namespaceProcessor) cleanupStaleShardStats(ctx context.Context, namespa activeShards := make(map[string]struct{}) now := p.timeSource.Now().Unix() - shardStatsTTL := int64(p.cfg.HeartbeatTTL.Seconds()) + shardStatsTTL := int64(p.cfg.ShardStatsTTL.Seconds()) // 1. build set of active executors diff --git a/service/sharddistributor/leader/process/processor_test.go b/service/sharddistributor/leader/process/processor_test.go index 1c6e0422caf..3e8d208a08d 100644 --- a/service/sharddistributor/leader/process/processor_test.go +++ b/service/sharddistributor/leader/process/processor_test.go @@ -44,8 +44,9 @@ func setupProcessorTest(t *testing.T, namespaceType string) *testDependencies { mockedClock, config.ShardDistribution{ Process: config.LeaderProcess{ - Period: time.Second, - HeartbeatTTL: time.Second, + Period: time.Second, + HeartbeatTTL: time.Second, + ShardStatsTTL: 10 * time.Second, }, }, ), @@ -215,7 +216,7 @@ func TestCleanupStaleShardStats(t *testing.T) { shardStats := map[string]store.ShardStatistics{ "shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, "shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now.Unix(), LastMoveTime: now.Unix()}, - "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Add(-2 * time.Second).Unix(), LastMoveTime: now.Add(-2 * time.Second).Unix()}, + "shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Add(-11 * time.Second).Unix(), LastMoveTime: now.Add(-11 * time.Second).Unix()}, } namespaceState := &store.NamespaceState{ diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index 7dcb6698f82..02d5edd5b1d 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -96,13 +96,18 @@ func NewStore(p ExecutorStoreParams) (store.Store, error) { timeSource = clock.NewRealTimeSource() } + shardStatsTTL := p.Cfg.Process.ShardStatsTTL + if shardStatsTTL <= 0 { + shardStatsTTL = config.DefaultShardStatsTTL + } + store := &executorStoreImpl{ client: etcdClient, prefix: etcdCfg.Prefix, logger: p.Logger, shardCache: shardCache, timeSource: timeSource, - maxStatsPersistIntervalSeconds: deriveStatsPersistInterval(p.Cfg.Process.HeartbeatTTL), + maxStatsPersistIntervalSeconds: deriveStatsPersistInterval(shardStatsTTL), } p.Lifecycle.Append(fx.StartStopHook(store.Start, store.Stop)) @@ -168,13 +173,12 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec return nil } -func deriveStatsPersistInterval(heartbeatTTL time.Duration) int64 { - ttlSeconds := int64(heartbeatTTL.Seconds()) - interval := ttlSeconds - 1 - if interval < 1 { - interval = 1 +func deriveStatsPersistInterval(shardStatsTTL time.Duration) int64 { + ttlSeconds := int64(shardStatsTTL.Seconds()) + if ttlSeconds <= 1 { + return 1 } - return interval + return ttlSeconds - 1 } func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) { diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go index 51237d81ee5..9d6d5c7f7e3 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore_test.go @@ -182,6 +182,7 @@ func TestRecordHeartbeatSkipsShardStatisticsWithNilReport(t *testing.T) { func TestRecordHeartbeatShardStatisticsThrottlesWrites(t *testing.T) { tc := testhelper.SetupStoreTestCluster(t) tc.LeaderCfg.Process.HeartbeatTTL = 10 * time.Second + tc.LeaderCfg.Process.ShardStatsTTL = 10 * time.Second mockTS := clock.NewMockedTimeSourceAt(time.Unix(1000, 0)) executorStore := createStoreWithTimeSource(t, tc, mockTS) esImpl, ok := executorStore.(*executorStoreImpl) From db70702b16d87465f48fcfbf133b5486c08c142b Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 19 Nov 2025 12:38:18 +0100 Subject: [PATCH 60/62] fix(shard-distributor): inverted condition in shard stats cleanup loop Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- service/sharddistributor/leader/process/processor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/sharddistributor/leader/process/processor.go b/service/sharddistributor/leader/process/processor.go index fbb3cc82e50..9179357f07c 100644 --- a/service/sharddistributor/leader/process/processor.go +++ b/service/sharddistributor/leader/process/processor.go @@ -240,7 +240,7 @@ func (p *namespaceProcessor) runShardStatsCleanupLoop(ctx context.Context) { continue } staleShardStats := p.identifyStaleShardStats(namespaceState) - if len(staleShardStats) > 0 { + if len(staleShardStats) == 0 { // No stale shard stats to delete continue } From b5c277ab9f6fb8148ee6376ed2b9c9ad1e89852e Mon Sep 17 00:00:00 2001 From: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> Date: Wed, 19 Nov 2025 13:45:44 +0100 Subject: [PATCH 61/62] feat(shard-distributor): only assign shards to ACTIVE executors and add+modify tests Modified pickLeastLoadedExecutor to skip executrs with non ACTIVE status when selecting an executor for shard assignment. Updated error message to reflect when no active executors are available. Add tests to cover this behavior. Adjust previous tests to also mock executors s.t. we get executors status Signed-off-by: Andreas Holt <6665487+AndreasHolt@users.noreply.github.com> --- service/sharddistributor/handler/handler.go | 9 +- .../sharddistributor/handler/handler_test.go | 91 ++++++++++++++++++- 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/service/sharddistributor/handler/handler.go b/service/sharddistributor/handler/handler.go index c6297df4869..b5c95fb51b6 100644 --- a/service/sharddistributor/handler/handler.go +++ b/service/sharddistributor/handler/handler.go @@ -159,7 +159,7 @@ func (h *handlerImpl) assignEphemeralShard(ctx context.Context, namespace string }, nil } -// pickLeastLoadedExecutor returns the executor with the minimal aggregated smoothed load. +// pickLeastLoadedExecutor returns the ACTIVE executor with the minimal aggregated smoothed load. // Ties are broken by fewer assigned shards. func pickLeastLoadedExecutor(state *store.NamespaceState) (executorID string, aggregatedLoad float64, assignedCount int, err error) { if state == nil || len(state.ShardAssignments) == 0 { @@ -173,6 +173,11 @@ func pickLeastLoadedExecutor(state *store.NamespaceState) (executorID string, ag minAssignedShards := math.MaxInt for candidate, assignment := range state.ShardAssignments { + executorState, ok := state.Executors[candidate] + if !ok || executorState.Status != types.ExecutorStatusACTIVE { + continue + } + aggregated := 0.0 for shard := range assignment.AssignedShards { if stats, ok := state.ShardStats[shard]; ok { @@ -193,7 +198,7 @@ func pickLeastLoadedExecutor(state *store.NamespaceState) (executorID string, ag } if chosenID == "" { - return "", 0, 0, fmt.Errorf("no executors in namespace state") + return "", 0, 0, fmt.Errorf("no active executors available") } return chosenID, chosenAggregatedLoad, chosenAssignedCount, nil diff --git a/service/sharddistributor/handler/handler_test.go b/service/sharddistributor/handler/handler_test.go index 9a9d2ded148..a5ca98c6969 100644 --- a/service/sharddistributor/handler/handler_test.go +++ b/service/sharddistributor/handler/handler_test.go @@ -138,6 +138,10 @@ func TestGetShardOwner(t *testing.T) { setupMocks: func(mockStore *store.MockStore) { mockStore.EXPECT().GetShardOwner(gomock.Any(), _testNamespaceEphemeral, "NON-EXISTING-SHARD").Return(nil, store.ErrShardNotFound) mockStore.EXPECT().GetState(gomock.Any(), _testNamespaceEphemeral).Return(&store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "owner1": {Status: types.ExecutorStatusACTIVE}, + "owner2": {Status: types.ExecutorStatusACTIVE}, + }, ShardAssignments: map[string]store.AssignedState{ "owner1": { AssignedShards: map[string]*types.ShardAssignment{ @@ -181,7 +185,13 @@ func TestGetShardOwner(t *testing.T) { setupMocks: func(mockStore *store.MockStore) { mockStore.EXPECT().GetShardOwner(gomock.Any(), _testNamespaceEphemeral, "NON-EXISTING-SHARD").Return(nil, store.ErrShardNotFound) mockStore.EXPECT().GetState(gomock.Any(), _testNamespaceEphemeral).Return(&store.NamespaceState{ - ShardAssignments: map[string]store.AssignedState{"owner1": {AssignedShards: map[string]*types.ShardAssignment{}}}}, nil) + Executors: map[string]store.HeartbeatState{ + "owner1": {Status: types.ExecutorStatusACTIVE}, + }, + ShardAssignments: map[string]store.AssignedState{ + "owner1": {AssignedShards: map[string]*types.ShardAssignment{}}, + }, + }, nil) mockStore.EXPECT().AssignShard(gomock.Any(), _testNamespaceEphemeral, "NON-EXISTING-SHARD", "owner1").Return(errors.New("assign shard failure")) }, expectedError: true, @@ -196,6 +206,10 @@ func TestGetShardOwner(t *testing.T) { setupMocks: func(mockStore *store.MockStore) { mockStore.EXPECT().GetShardOwner(gomock.Any(), _testNamespaceEphemeral, "new-shard").Return(nil, store.ErrShardNotFound) mockStore.EXPECT().GetState(gomock.Any(), _testNamespaceEphemeral).Return(&store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "owner1": {Status: types.ExecutorStatusACTIVE}, + "owner2": {Status: types.ExecutorStatusACTIVE}, + }, ShardAssignments: map[string]store.AssignedState{ "owner1": { AssignedShards: map[string]*types.ShardAssignment{ @@ -223,6 +237,30 @@ func TestGetShardOwner(t *testing.T) { expectedOwner: "owner2", expectedError: false, }, + { + name: "ShardNotFound_Ephemeral_AllExecutorsDraining", + request: &types.GetShardOwnerRequest{ + Namespace: _testNamespaceEphemeral, + ShardKey: "new-shard", + }, + setupMocks: func(mockStore *store.MockStore) { + mockStore.EXPECT().GetShardOwner(gomock.Any(), _testNamespaceEphemeral, "new-shard").Return(nil, store.ErrShardNotFound) + mockStore.EXPECT().GetState(gomock.Any(), _testNamespaceEphemeral).Return(&store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "owner1": {Status: types.ExecutorStatusDRAINING}, + }, + ShardAssignments: map[string]store.AssignedState{ + "owner1": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard1": {Status: types.AssignmentStatusREADY}, + }, + }, + }, + }, nil) + }, + expectedError: true, + expectedErrMsg: "no active executors", + }, } for _, tt := range tests { @@ -266,6 +304,10 @@ func TestPickLeastLoadedExecutor(t *testing.T) { { name: "SelectsLeastLoaded", state: &store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "exec1": {Status: types.ExecutorStatusACTIVE}, + "exec2": {Status: types.ExecutorStatusACTIVE}, + }, ShardAssignments: map[string]store.AssignedState{ "exec1": { AssignedShards: map[string]*types.ShardAssignment{ @@ -295,11 +337,58 @@ func TestPickLeastLoadedExecutor(t *testing.T) { expectedCount: 2, expectedError: false, }, + { + name: "SkipsNonActiveExecutors", + state: &store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "exec1": {Status: types.ExecutorStatusDRAINING}, + "exec2": {Status: types.ExecutorStatusACTIVE}, + }, + ShardAssignments: map[string]store.AssignedState{ + "exec1": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard1": {}, + }, + }, + "exec2": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard2": {}, + "shard3": {}, + }, + }, + }, + ShardStats: map[string]store.ShardStatistics{ + "shard1": {SmoothedLoad: 0.1}, + "shard2": {SmoothedLoad: 1.0}, + "shard3": {SmoothedLoad: 2.0}, + }, + }, + expectedOwner: "exec2", + expectedLoad: 3.0, + expectedCount: 2, + expectedError: false, + }, { name: "SelectsLeastLoaded_NoExecutors", state: &store.NamespaceState{}, expectedError: true, }, + { + name: "SelectsLeastLoaded_NoActiveExecutors", + state: &store.NamespaceState{ + Executors: map[string]store.HeartbeatState{ + "exec1": {Status: types.ExecutorStatusDRAINING}, + }, + ShardAssignments: map[string]store.AssignedState{ + "exec1": { + AssignedShards: map[string]*types.ShardAssignment{ + "shard1": {}, + }, + }, + }, + }, + expectedError: true, + }, { name: "SelectsLeastLoaded_NoShards", state: &store.NamespaceState{ From 6d32bb82cc3d3e274228cb96824a71db5a37feb1 Mon Sep 17 00:00:00 2001 From: Theis Mathiassen Date: Thu, 20 Nov 2025 10:13:38 +0100 Subject: [PATCH 62/62] fix: added guard to check if prev load is in invalid state Signed-off-by: Theis Mathiassen --- service/sharddistributor/handler/handler.go | 7 +++++-- .../sharddistributor/store/etcd/executorstore/etcdstore.go | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/service/sharddistributor/handler/handler.go b/service/sharddistributor/handler/handler.go index b5c95fb51b6..ab053160463 100644 --- a/service/sharddistributor/handler/handler.go +++ b/service/sharddistributor/handler/handler.go @@ -162,8 +162,11 @@ func (h *handlerImpl) assignEphemeralShard(ctx context.Context, namespace string // pickLeastLoadedExecutor returns the ACTIVE executor with the minimal aggregated smoothed load. // Ties are broken by fewer assigned shards. func pickLeastLoadedExecutor(state *store.NamespaceState) (executorID string, aggregatedLoad float64, assignedCount int, err error) { - if state == nil || len(state.ShardAssignments) == 0 { - return "", 0, 0, fmt.Errorf("namespace state is nil or has no executors") + if state == nil { + return "", 0, 0, fmt.Errorf("namespace state is nil") + } + if len(state.ShardAssignments) == 0 { + return "", 0, 0, fmt.Errorf("namespace state has no executors") } var chosenID string diff --git a/service/sharddistributor/store/etcd/executorstore/etcdstore.go b/service/sharddistributor/store/etcd/executorstore/etcdstore.go index db511f53b23..f509801ca16 100644 --- a/service/sharddistributor/store/etcd/executorstore/etcdstore.go +++ b/service/sharddistributor/store/etcd/executorstore/etcdstore.go @@ -891,5 +891,8 @@ func ewmaSmoothedLoad(prev, current float64, lastUpdate, now int64) float64 { } dt := max(now-lastUpdate, 0) alpha := 1 - math.Exp(-float64(dt)/tauSeconds) + if math.IsNaN(prev) || math.IsInf(prev, 0) { + return current + } return (1-alpha)*prev + alpha*current }