|
| 1 | +package io.envoyproxy.controlplane.server.callback; |
| 2 | + |
| 3 | +import com.google.common.annotations.VisibleForTesting; |
| 4 | +import com.google.common.util.concurrent.ThreadFactoryBuilder; |
| 5 | +import envoy.api.v2.Discovery; |
| 6 | +import io.envoyproxy.controlplane.cache.NodeGroup; |
| 7 | +import io.envoyproxy.controlplane.cache.Snapshot; |
| 8 | +import io.envoyproxy.controlplane.cache.SnapshotCache; |
| 9 | +import io.envoyproxy.controlplane.server.DiscoveryServerCallbacks; |
| 10 | +import java.time.Clock; |
| 11 | +import java.time.Instant; |
| 12 | +import java.time.temporal.ChronoUnit; |
| 13 | +import java.util.LinkedHashSet; |
| 14 | +import java.util.Map; |
| 15 | +import java.util.Set; |
| 16 | +import java.util.concurrent.ConcurrentHashMap; |
| 17 | +import java.util.concurrent.Executors; |
| 18 | +import java.util.concurrent.ScheduledExecutorService; |
| 19 | +import java.util.concurrent.TimeUnit; |
| 20 | +import java.util.function.Consumer; |
| 21 | + |
| 22 | +/** |
| 23 | + * Callback that keeps track of the number of streams associated with each node group and periodically clears |
| 24 | + * out {@link Snapshot}s from the cache that are no longer referenced by any streams. |
| 25 | + * |
| 26 | + * <p>Works by monitoring the stream to determine what group they belong to and keeps a running count as well |
| 27 | + * as when a request is seen that targets a given node group. |
| 28 | + * |
| 29 | + * <p>Every {@code collectionIntervalMillis} milliseconds a cleanup job runs which looks for snapshots with no |
| 30 | + * active streams that haven't been updated within the configured time frame. Checking the time since last update |
| 31 | + * is done to prevent snapshots from being prematurely removed from the cache. It ensures that a group must have |
| 32 | + * no active streams for {@code collectAfterMillis} milliseconds before being collected. |
| 33 | + * |
| 34 | + * <p>To be notified of snapshots that are removed, a set of callbacks may be provided which will be triggered |
| 35 | + * whenever a snapshot is removed from the cache. Any other callback which maintains state about the snapshots |
| 36 | + * that is cleaned up by one of these callbacks should be run *after* this callback. This helps ensure that |
| 37 | + * if state is cleaned up while a request in inbound, the request will be blocked by the lock in this callback |
| 38 | + * until collection finishes and the subsequent callbacks will see the new request come in after collection. If the |
| 39 | + * order is reversed, another callback might have seen the new request but the refcount here hasn't been incremented, |
| 40 | + * causing it to get cleaned up and wipe the state of the other callback even though we now have an active stream |
| 41 | + * for that group. |
| 42 | + */ |
| 43 | +public class SnapshotCollectingCallback<T> implements DiscoveryServerCallbacks { |
| 44 | + private static class SnapshotState { |
| 45 | + int streamCount; |
| 46 | + Instant lastSeen; |
| 47 | + } |
| 48 | + |
| 49 | + private final SnapshotCache<T> snapshotCache; |
| 50 | + private final NodeGroup<T> nodeGroup; |
| 51 | + private final Clock clock; |
| 52 | + private final Set<Consumer<T>> collectorCallbacks; |
| 53 | + private final long collectAfterMillis; |
| 54 | + private final Map<T, SnapshotState> snapshotStates = new ConcurrentHashMap<>(); |
| 55 | + private final Map<Long, T> groupByStream = new ConcurrentHashMap<>(); |
| 56 | + |
| 57 | + /** |
| 58 | + * Creates the callback. |
| 59 | + * |
| 60 | + * @param snapshotCache the cache to evict snapshots from |
| 61 | + * @param nodeGroup the node group used to map requests to groups |
| 62 | + * @param clock system clock |
| 63 | + * @param collectorCallbacks the callbacks to invoke when snapshot is collected |
| 64 | + * @param collectAfterMillis how long a snapshot must be referenced for before being collected |
| 65 | + * @param collectionIntervalMillis how often the collection background action should run |
| 66 | + */ |
| 67 | + public SnapshotCollectingCallback(SnapshotCache<T> snapshotCache, |
| 68 | + NodeGroup<T> nodeGroup, Clock clock, Set<Consumer<T>> collectorCallbacks, |
| 69 | + long collectAfterMillis, long collectionIntervalMillis) { |
| 70 | + this.snapshotCache = snapshotCache; |
| 71 | + this.nodeGroup = nodeGroup; |
| 72 | + this.clock = clock; |
| 73 | + this.collectorCallbacks = collectorCallbacks; |
| 74 | + this.collectAfterMillis = collectAfterMillis; |
| 75 | + ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor( |
| 76 | + new ThreadFactoryBuilder().setNameFormat("snapshot-gc-%d").build()); |
| 77 | + executorService.scheduleAtFixedRate(() -> deleteUnreferenced(clock), collectionIntervalMillis, |
| 78 | + collectionIntervalMillis, TimeUnit.MILLISECONDS); |
| 79 | + } |
| 80 | + |
| 81 | + @Override |
| 82 | + public synchronized void onStreamRequest(long streamId, Discovery.DiscoveryRequest request) { |
| 83 | + T groupIdentifier = nodeGroup.hash(request.getNode()); |
| 84 | + |
| 85 | + SnapshotState snapshotState = |
| 86 | + this.snapshotStates.computeIfAbsent(groupIdentifier, x -> new SnapshotState()); |
| 87 | + snapshotState.lastSeen = clock.instant(); |
| 88 | + |
| 89 | + if (groupByStream.put(streamId, groupIdentifier) == null) { |
| 90 | + snapshotState.streamCount++; |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + @Override public void onStreamClose(long streamId, String typeUrl) { |
| 95 | + onStreamCloseHelper(streamId); |
| 96 | + } |
| 97 | + |
| 98 | + @Override public void onStreamCloseWithError(long streamId, String typeUrl, Throwable error) { |
| 99 | + onStreamCloseHelper(streamId); |
| 100 | + } |
| 101 | + |
| 102 | + @VisibleForTesting |
| 103 | + synchronized void deleteUnreferenced(Clock clock) { |
| 104 | + // Keep track of snapshots to delete to avoid CME. |
| 105 | + Set<T> toDelete = new LinkedHashSet<>(); |
| 106 | + |
| 107 | + for (Map.Entry<T, SnapshotState> entry : snapshotStates.entrySet()) { |
| 108 | + if (entry.getValue().streamCount == 0 && entry.getValue().lastSeen.isBefore( |
| 109 | + clock.instant().minus(collectAfterMillis, ChronoUnit.MILLIS))) { |
| 110 | + |
| 111 | + // clearSnapshot will do nothing and return false if there are any pending watches - this |
| 112 | + // ensures that we don't actually remove a snapshot that's in use. |
| 113 | + T groupIdentifier = entry.getKey(); |
| 114 | + if (snapshotCache.clearSnapshot(groupIdentifier)) { |
| 115 | + toDelete.add(groupIdentifier); |
| 116 | + } |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + toDelete.forEach(group -> { |
| 121 | + snapshotStates.remove(group); |
| 122 | + collectorCallbacks.forEach(cb -> cb.accept(group)); |
| 123 | + }); |
| 124 | + } |
| 125 | + |
| 126 | + private synchronized void onStreamCloseHelper(long streamId) { |
| 127 | + T removed = groupByStream.remove(streamId); |
| 128 | + if (removed == null) { |
| 129 | + // This will happen if the stream closed before we received the first request. |
| 130 | + return; |
| 131 | + } |
| 132 | + |
| 133 | + SnapshotState snapshotState = snapshotStates.get(removed); |
| 134 | + snapshotState.streamCount--; |
| 135 | + snapshotState.lastSeen = clock.instant(); |
| 136 | + } |
| 137 | +} |
0 commit comments