Skip to content

Commit 5249b87

Browse files
Fix data race in host infrastructure proxy context map
This commit fixes a data race in the host infrastructure where proxyContextMap was being accessed concurrently without synchronization. The race occurred between: - runEnvoy() writing to the map (line 92) - Close() reading from the map (line 37) - stopEnvoy() reading and deleting from the map (lines 125, 129) Added a sync.RWMutex to protect all accesses to proxyContextMap: - Use RLock for reads in Close() and CreateOrUpdateProxyInfra() - Use Lock for writes in runEnvoy() and stopEnvoy() - Extract map keys before iteration in Close() to avoid holding the lock while calling stopEnvoy() This ensures thread-safe access to the proxy context map during concurrent infrastructure operations. Signed-off-by: Adrian Cole <[email protected]>
1 parent 9155924 commit 5249b87

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

internal/infrastructure/host/infra.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"io"
1212
"os"
1313
"path/filepath"
14+
"sync"
1415

1516
egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
1617
"github.com/envoyproxy/gateway/internal/envoygateway/config"
@@ -46,6 +47,8 @@ type Infra struct {
4647

4748
// proxyContextMap store the context of each running proxy by its name for lifecycle management.
4849
proxyContextMap map[string]*proxyContext
50+
// mu protects proxyContextMap from concurrent access.
51+
mu sync.RWMutex
4952

5053
// TODO: remove this field once it supports the configurable homeDir
5154
sdsConfigPath string

internal/infrastructure/host/proxy_infra.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@ type proxyContext struct {
3434

3535
// Close implements the Manager interface.
3636
func (i *Infra) Close() error {
37+
i.mu.RLock()
38+
names := make([]string, 0, len(i.proxyContextMap))
3739
for name := range i.proxyContextMap {
40+
names = append(names, name)
41+
}
42+
i.mu.RUnlock()
43+
44+
for _, name := range names {
3845
i.stopEnvoy(name)
3946
}
4047
return nil
@@ -53,7 +60,10 @@ func (i *Infra) CreateOrUpdateProxyInfra(ctx context.Context, infra *ir.Infra) e
5360
proxyInfra := infra.GetProxyInfra()
5461
proxyName := utils.GetHashedName(proxyInfra.Name, 64)
5562
// Return directly if the proxy is running.
56-
if _, ok := i.proxyContextMap[proxyName]; ok {
63+
i.mu.RLock()
64+
_, running := i.proxyContextMap[proxyName]
65+
i.mu.RUnlock()
66+
if running {
5767
return nil
5868
}
5969

@@ -89,7 +99,9 @@ func (i *Infra) CreateOrUpdateProxyInfra(ctx context.Context, infra *ir.Infra) e
8999
func (i *Infra) runEnvoy(ctx context.Context, envoyVersion, name string, args []string) {
90100
pCtx, cancel := context.WithCancel(ctx)
91101
exit := make(chan struct{}, 1)
102+
i.mu.Lock()
92103
i.proxyContextMap[name] = &proxyContext{cancel: cancel, exit: exit}
104+
i.mu.Unlock()
93105
go func() {
94106
// Run blocks until pCtx is done or the process exits where the latter doesn't happen when
95107
// Envoy successfully starts up. So, this will not return until pCtx is done in practice.
@@ -122,11 +134,18 @@ func (i *Infra) DeleteProxyInfra(_ context.Context, infra *ir.Infra) error {
122134

123135
// stopEnvoy stops the Envoy process by its name. It will block until the process completely stopped.
124136
func (i *Infra) stopEnvoy(proxyName string) {
125-
if pCtx, ok := i.proxyContextMap[proxyName]; ok {
137+
i.mu.Lock()
138+
pCtx, ok := i.proxyContextMap[proxyName]
139+
i.mu.Unlock()
140+
141+
if ok {
126142
pCtx.cancel() // Cancel causes the Envoy process to exit.
127143
<-pCtx.exit // Wait for the Envoy process to completely exit.
128144
close(pCtx.exit) // Close the channel to avoid leaking.
145+
146+
i.mu.Lock()
129147
delete(i.proxyContextMap, proxyName)
148+
i.mu.Unlock()
130149
}
131150
}
132151

0 commit comments

Comments
 (0)