Skip to content

Commit 257e2e6

Browse files
authored
Check what ports gateways are listening on and filter based on that (#58131)
* Check what ports gateways are listening on and filter based on that TestTraffic tests are failing when we run them in ambient multi-network environment. Looking through the failure details it seems that we overlooked interoperability between ambient and sidecar. Specifically, when in the same namespace we have both pods/services using ambient dataplane and pods/services using sidecar at the same time. In multi-network scenario, what can happen is that sidecars will see ambient E/W gateways, because pilot generates EDS endpoints for those, but at the moment sidecars cannot talke to ambient E/W gateways because those expect double-HBONE. We want to ultimately allow sidecars to talk to ambient E/W gateways using double-HBONE (or add support for mTLS to ambient E/W gateways), but it's a bigger project. So for now I'm trying to filter gateways that expect double-HBONE from sidecar endpoints. There are a few options to achieve that that I considered: 1. [not exactly a fix] Use istio.test.ambient.everywhere flag for tests - it avoids the issue, but it also defeats one of the reasons for running those tests in the first place 2. [non invasive] check if gw has a non-zero HBONE port and if so exclude it from gws used by sidecar - this solves the problem and is probably the least risky change 3. [proper, I think] When generating gateways check what ports they are actually listening on and when generating EDS endpoints account for that (similarly to how we would take into account HBONE port in the option 2 above). I ultimately decided to go for the last option. Option 1 is not exactly a solution, so I discarded it for that reason. Option 2 works, but it introduces an assumption that a GW is either ambient and listens on HBONE port or a mTLS, but not both - this is true now (and probably long into the future), but it does not have to be. Another thing to mention is that it creates a bit of assymetry between how HBONE port and mTLS port are treated in the code - I personally don't like that. And the last item in favor of the option 3 is that there is a TODO in the code that suggests that we should at least consider checking what ports the gw service is actually listening on. Going for option 3, assuming we don't find any downsides, is that we can close that TODO as well. So this change adds a check to the code that generates gateways from services to confirm that the service actually listens on the port we intend to use. If the service does not listen on the port, I mark it as 0, so upstream code can check for that. Then in the EDS endpoint generation code I check if we are generating for a sidecar and if so, I ignore gateways that don't listen on mTLS port (by checking if it's 0 or not). Signed-off-by: Mikhail Krinkin <[email protected]> * Fix the relevant tests Signed-off-by: Mikhail Krinkin <[email protected]> * Fix formatting Signed-off-by: Mikhail Krinkin <[email protected]> * Add a release note for the fix Signed-off-by: Mikhail Krinkin <[email protected]> --------- Signed-off-by: Mikhail Krinkin <[email protected]>
1 parent e4acd9f commit 257e2e6

File tree

5 files changed

+57
-12
lines changed

5 files changed

+57
-12
lines changed

pilot/pkg/serviceregistry/kube/controller/network.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -312,28 +312,43 @@ func (n *networkManager) extractGatewaysInner(svc *model.Service) bool {
312312

313313
// getGatewayDetails returns gateways without the address populated, only the network and (unmapped) port for a given service.
314314
func (n *networkManager) getGatewayDetails(svc *model.Service) []model.NetworkGateway {
315-
// TODO should we start checking if svc's Ports contain the gateway port?
316-
317315
// We have different types of E/W gateways - those that use mTLS (those are used in sidecar mode when talking cross networks)
318316
// and those that use double-HBONE (those are used in ambient mode when talking cross cluster). A gateway service may or may
319317
// not listen on the mTLS (15443, by default) or HBONE (15008) ports, depending on the mode of operation used by the mesh
320318
// in the remote cluster. We should not use gateways that don't really listen on the right port.
321-
hbonePort := uint32(DefaultNetworkGatewayHBONEPort)
322-
if _, exists := svc.Ports.GetByPort(DefaultNetworkGatewayHBONEPort); !exists {
323-
hbonePort = 0
324-
}
325319

326320
// label based gateways
327321
// TODO label based gateways could support being the gateway for multiple networks
328322
if nw := svc.Attributes.Labels[label.TopologyNetwork.Name]; nw != "" {
323+
hbonePort := DefaultNetworkGatewayHBONEPort
324+
gwPort := DefaultNetworkGatewayPort
325+
329326
if gwPortStr := svc.Attributes.Labels[label.NetworkingGatewayPort.Name]; gwPortStr != "" {
330-
if gwPort, err := strconv.Atoi(gwPortStr); err == nil {
331-
return []model.NetworkGateway{{Port: uint32(gwPort), HBONEPort: hbonePort, Network: network.ID(nw)}}
327+
port, err := strconv.Atoi(gwPortStr)
328+
if err != nil {
329+
log.Warnf("could not parse %q for %s on %s/%s; defaulting to %d",
330+
gwPortStr, label.NetworkingGatewayPort.Name, svc.Attributes.Namespace, svc.Attributes.Name, DefaultNetworkGatewayPort)
331+
} else {
332+
gwPort = port
332333
}
333-
log.Warnf("could not parse %q for %s on %s/%s; defaulting to %d",
334-
gwPortStr, label.NetworkingGatewayPort.Name, svc.Attributes.Namespace, svc.Attributes.Name, DefaultNetworkGatewayPort)
335334
}
336-
return []model.NetworkGateway{{Port: DefaultNetworkGatewayPort, HBONEPort: hbonePort, Network: network.ID(nw)}}
335+
336+
_, acceptMTLS := svc.Ports.GetByPort(gwPort)
337+
_, acceptHBONE := svc.Ports.GetByPort(hbonePort)
338+
339+
if !acceptMTLS && !acceptHBONE {
340+
log.Warnf("service %s/%s is labeled as gateway, but does not listen neither on port %d nor on port %d",
341+
svc.Attributes.Namespace, svc.Attributes.Name, gwPort, hbonePort)
342+
return nil
343+
}
344+
345+
if !acceptMTLS {
346+
gwPort = 0
347+
}
348+
if !acceptHBONE {
349+
hbonePort = 0
350+
}
351+
return []model.NetworkGateway{{Port: uint32(gwPort), HBONEPort: uint32(hbonePort), Network: network.ID(nw)}}
337352
}
338353

339354
// meshNetworks registryServiceName+fromRegistry

pilot/pkg/xds/endpoints/endpoint_builder.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,3 +927,7 @@ func isWaypointProxy(node *model.Proxy) bool {
927927

928928
return isManagedGateway && controller == constants.ManagedGatewayMeshControllerLabel
929929
}
930+
931+
func isSidecarProxy(node *model.Proxy) bool {
932+
return node != nil && node.Type == model.SidecarProxy
933+
}

pilot/pkg/xds/endpoints/ep_filters.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,20 @@ func (b *EndpointBuilder) selectNetworkGateways(nw network.ID, c cluster.ID) []m
288288
return ambientGws
289289
}
290290

291+
// Sidecar proxies cannot talk to ambient E/W gateway for now, so when we see an ambient
292+
// E/W gateway (e.g., a gateway that listens on hbone port, but does not have an mTLS port
293+
// we filter it out.
294+
if isSidecarProxy(b.proxy) {
295+
var sidecarGws []model.NetworkGateway
296+
for _, gw := range gws {
297+
if gw.Port == 0 {
298+
continue
299+
}
300+
sidecarGws = append(sidecarGws, gw)
301+
}
302+
return sidecarGws
303+
}
304+
291305
return gws
292306
}
293307

pilot/pkg/xds/mesh_network_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ func TestMeshNetworking(t *testing.T) {
191191
Spec: corev1.ServiceSpec{
192192
Type: corev1.ServiceTypeClusterIP,
193193
ExternalIPs: []string{"3.3.3.3"},
194+
Ports: []corev1.ServicePort{{Port: 15443}},
194195
},
195196
}},
196197
},
@@ -574,7 +575,10 @@ func gatewaySvc(name, ip, network string) *corev1.Service {
574575
Namespace: "istio-system",
575576
Labels: map[string]string{label.TopologyNetwork.Name: network},
576577
},
577-
Spec: corev1.ServiceSpec{Type: corev1.ServiceTypeLoadBalancer},
578+
Spec: corev1.ServiceSpec{
579+
Type: corev1.ServiceTypeLoadBalancer,
580+
Ports: []corev1.ServicePort{{Port: 15443}},
581+
},
578582
Status: corev1.ServiceStatus{
579583
LoadBalancer: corev1.LoadBalancerStatus{Ingress: []corev1.LoadBalancerIngress{{IP: ip}}},
580584
},

releasenotes/notes/57878.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: release-notes/v2
2+
kind: bug-fix
3+
area: traffic-management
4+
issue:
5+
- 57878
6+
releaseNotes:
7+
- |
8+
**Fixed** the issue when sidecars try to route requests to ambient E/W gateways incorrectly.

0 commit comments

Comments
 (0)