@@ -225,7 +225,6 @@ NodeManager::NodeManager(
225225 cluster_lease_manager_ (cluster_lease_manager),
226226 record_metrics_period_ms_ (config.record_metrics_period_ms),
227227 placement_group_resource_manager_ (placement_group_resource_manager),
228- next_resource_seq_no_ (0 ),
229228 ray_syncer_ (io_service_, self_node_id_.Binary()),
230229 worker_killing_policy_ (std::make_shared<GroupByOwnerIdWorkerKillingPolicy>()),
231230 memory_monitor_ (std::make_unique<MemoryMonitor>(
@@ -324,16 +323,29 @@ void NodeManager::RegisterGcs() {
324323 auto on_node_change_subscribe_done = [this ](Status status) {
325324 RAY_CHECK_OK (status);
326325
327- // Register resource manager and scheduler
326+ // RESOURCE_VIEW is used to synchronize available resources across Raylets.
327+ //
328+ // LocalResourceManager::CreateSyncMessage will be called periodically to collect
329+ // the local Raylet's usage to broadcast to others (via the GCS). The updates are
330+ // versioned inside of `LocalResourceManager` to avoid unnecessary broadcasts.
331+ //
332+ // NodeManager::ConsumeSyncMessage will be called when a sync message containing
333+ // other Raylets' resource usage is received.
328334 ray_syncer_.Register (
329335 /* message_type */ syncer::MessageType::RESOURCE_VIEW,
330336 /* reporter */ &cluster_resource_scheduler_.GetLocalResourceManager (),
331337 /* receiver */ this ,
332338 /* pull_from_reporter_interval_ms */
333339 report_resources_period_ms_);
334340
335- // Register a commands channel.
336- // It's only used for GC right now.
341+ // COMMANDS is used only to broadcast a global request to call the Python garbage
342+ // collector on all Raylets when the cluster is under memory pressure.
343+ //
344+ // Periodic collection is disabled, so this command is only broadcasted via
345+ // `OnDemandBroadcasting` (which will call NodeManager::CreateSyncMessage).
346+ //
347+ // NodeManager::ConsumeSyncMessage is called to execute the GC command from other
348+ // Raylets.
337349 ray_syncer_.Register (
338350 /* message_type */ syncer::MessageType::COMMANDS,
339351 /* reporter */ this ,
@@ -348,6 +360,9 @@ void NodeManager::RegisterGcs() {
348360 // If plasma store is under high pressure, we should try to schedule a global
349361 // gc.
350362 if (triggered_by_global_gc) {
363+ // Always increment the sync message version number so that all GC commands
364+ // are sent indiscriminately.
365+ gc_command_sync_version_++;
351366 ray_syncer_.OnDemandBroadcasting (syncer::MessageType::COMMANDS);
352367 }
353368 },
@@ -3034,19 +3049,25 @@ void NodeManager::ConsumeSyncMessage(
30343049
30353050std::optional<syncer::RaySyncMessage> NodeManager::CreateSyncMessage (
30363051 int64_t after_version, syncer::MessageType message_type) const {
3052+ // This method is only called for the COMMANDS channel, as the RESOURCE_VIEW
3053+ // channel goes through the LocalResourceManager.
30373054 RAY_CHECK_EQ (message_type, syncer::MessageType::COMMANDS);
30383055
3056+ // Serialize the COMMANDS message to a byte string to be nested inside the sync message.
3057+ std::string serialized_commands_sync_msg;
30393058 syncer::CommandsSyncMessage commands_sync_message;
30403059 commands_sync_message.set_should_global_gc (true );
30413060 commands_sync_message.set_cluster_full_of_actors_detected (resource_deadlock_warned_ >=
30423061 1 );
3062+ RAY_CHECK (commands_sync_message.SerializeToString (&serialized_commands_sync_msg));
3063+
3064+ // Populate the sync message.
30433065 syncer::RaySyncMessage msg;
3044- msg.set_version (absl::GetCurrentTimeNanos () );
3066+ msg.set_version (gc_command_sync_version_ );
30453067 msg.set_node_id (self_node_id_.Binary ());
30463068 msg.set_message_type (syncer::MessageType::COMMANDS);
3047- std::string serialized_msg;
3048- RAY_CHECK (commands_sync_message.SerializeToString (&serialized_msg));
3049- msg.set_sync_message (std::move (serialized_msg));
3069+ msg.set_sync_message (std::move (serialized_commands_sync_msg));
3070+
30503071 return std::make_optional (std::move (msg));
30513072}
30523073
0 commit comments