Skip to content

Commit 62f541a

Browse files
committed
roachprod: fix service registry for unregistered services
As of #125393, we no longer register all virtual clusters with DNS. This was an optimization made to reduce unnecessary DNS usage when there was only one tenant using the default ports. Specifically, we no longer register shared process tenants and system tenants using the default ports. However, some of the existing logic still assumes that registration happens for all virtual clusters. For example, clusterSynced.Stop errors out if a shared process virtual cluster is not registered, even though this is now the expected behavior. This change establishes that DiscoverService only attempts to find services, while ServiceDescriptor will always return the service descriptor using fallback logic if needed.
1 parent c5c00d9 commit 62f541a

File tree

6 files changed

+220
-202
lines changed

6 files changed

+220
-202
lines changed

pkg/roachprod/install/cluster_synced.go

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -464,31 +464,24 @@ func (c *SyncedCluster) Stop(
464464
// killProcesses indicates whether processed need to be stopped.
465465
killProcesses := true
466466

467-
// Non system shared process virtual clusters don't get killed but are stopped via SQL.
468-
// Figure out if the virtual cluster is one or not.
467+
// For shared process secondary tenants, we just stop the service via SQL.
468+
// Find out of this is a shared process secondary tenant.
469469
if virtualClusterLabel != "" {
470470
name, sqlInstance, err := VirtualClusterInfoFromLabel(virtualClusterLabel)
471471
if err != nil {
472472
return err
473473
}
474474

475-
if name != SystemInterfaceName {
476-
services, err := c.DiscoverServices(ctx, name, ServiceTypeSQL)
475+
if !IsSystemInterface(name) {
476+
isExternal, err := c.IsExternalService(ctx, name)
477477
if err != nil {
478478
return err
479479
}
480480

481-
if len(services) == 0 {
482-
return fmt.Errorf("no service for virtual cluster %q", virtualClusterName)
483-
}
484-
485-
virtualClusterName = name
486-
if services[0].ServiceMode == ServiceModeShared {
487-
// For shared process virtual clusters, we just stop the service
488-
// via SQL.
489-
killProcesses = false
490-
} else {
481+
if isExternal {
491482
virtualClusterDisplay = fmt.Sprintf(" virtual cluster %q, instance %d", virtualClusterName, sqlInstance)
483+
} else {
484+
killProcesses = false
492485
}
493486
}
494487
}
@@ -2268,7 +2261,7 @@ func (c *SyncedCluster) pgurls(
22682261
}
22692262
m := make(map[Node]string, len(hosts))
22702263
for node, host := range hosts {
2271-
desc, err := c.DiscoverService(ctx, node, virtualClusterName, ServiceTypeSQL, sqlInstance)
2264+
desc, err := c.ServiceDescriptor(ctx, node, virtualClusterName, ServiceTypeSQL, sqlInstance)
22722265
if err != nil {
22732266
return nil, err
22742267
}
@@ -2303,24 +2296,19 @@ func (c *SyncedCluster) loadBalancerURL(
23032296
sqlInstance int,
23042297
auth PGAuthMode,
23052298
) (string, error) {
2306-
services, err := c.DiscoverServices(ctx, virtualClusterName, ServiceTypeSQL)
2299+
// Note that it's possible for our service to not be running on the entire roachprod
2300+
// cluster, e.g. one of the nodes is a workload node, or we have a separate process
2301+
// virtual cluster running on a subset of nodes. We must search for our service on
2302+
// the entire cluster.
2303+
descs, err := c.ServiceDescriptors(ctx, c.Nodes, virtualClusterName, ServiceTypeSQL, sqlInstance)
23072304
if err != nil {
23082305
return "", err
23092306
}
2310-
port := config.DefaultSQLPort
2311-
serviceMode := ServiceModeExternal
2312-
for _, service := range services {
2313-
if service.VirtualClusterName == virtualClusterName && service.Instance == sqlInstance {
2314-
serviceMode = service.ServiceMode
2315-
port = service.Port
2316-
break
2317-
}
2318-
}
2319-
address, err := c.FindLoadBalancer(l, port)
2307+
address, err := c.FindLoadBalancer(l, descs[0].Port)
23202308
if err != nil {
23212309
return "", err
23222310
}
2323-
loadBalancerURL := c.NodeURL(address.IP, address.Port, virtualClusterName, serviceMode, auth, "" /* database */)
2311+
loadBalancerURL := c.NodeURL(address.IP, address.Port, virtualClusterName, descs[0].ServiceMode, auth, "" /* database */)
23242312
return loadBalancerURL, nil
23252313
}
23262314

@@ -2725,7 +2713,7 @@ func (c *SyncedCluster) Reset(l *logger.Logger) error {
27252713
return nil
27262714
}
27272715

2728-
nodes := c.TargetNodes()
2716+
nodes := c.Nodes
27292717
targetVMs := make(vm.List, len(nodes))
27302718
for idx, node := range nodes {
27312719
targetVMs[idx] = c.VMs[node-1]

0 commit comments

Comments
 (0)