Skip to content

Commit f86d90b

Browse files
committed
smoke tests
1 parent 46bced4 commit f86d90b

File tree

1 file changed

+61
-107
lines changed

1 file changed

+61
-107
lines changed

exporter/pbm_collector_test.go

Lines changed: 61 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ package exporter
1818
import (
1919
"context"
2020
"fmt"
21-
"runtime"
2221
"strings"
2322
"testing"
2423
"time"
2524

25+
pbmconnect "github.com/percona/percona-backup-mongodb/pbm/connect"
2626
"github.com/prometheus/client_golang/prometheus"
2727
"github.com/prometheus/client_golang/prometheus/testutil"
2828
"github.com/prometheus/common/promslog"
@@ -73,12 +73,7 @@ func TestPBMCollector(t *testing.T) {
7373
}
7474

7575
// TestPBMCollectorConnectionLeak verifies that the PBM collector does not leak
76-
// MongoDB connections or goroutines across multiple scrape cycles.
77-
//
78-
// This test monitors both server-side connections and client-side goroutines
79-
// to detect leaks. A leak in the PBM SDK's connect.MongoConnectWithOpts()
80-
// function (where Ping() failure doesn't call Disconnect()) would cause
81-
// goroutine growth even if server connections appear stable.
76+
// MongoDB connections across multiple scrape cycles.
8277
//
8378
//nolint:paralleltest
8479
func TestPBMCollectorConnectionLeak(t *testing.T) {
@@ -101,13 +96,11 @@ func TestPBMCollectorConnectionLeak(t *testing.T) {
10196
err = client.Ping(ctx, nil)
10297
require.NoError(t, err)
10398

104-
// Allow GC and goroutines to stabilize before measuring
105-
runtime.GC()
99+
// Allow connections to stabilize
106100
time.Sleep(100 * time.Millisecond)
107101

108102
initialConns := getServerConnectionCount(ctx, t, client)
109-
initialGoroutines := runtime.NumGoroutine()
110-
t.Logf("Initial state: connections=%d, goroutines=%d", initialConns, initialGoroutines)
103+
t.Logf("Initial connections: %d", initialConns)
111104

112105
c := newPbmCollector(ctx, client, mongoURI, promslog.New(&promslog.Config{}))
113106

@@ -121,138 +114,99 @@ func TestPBMCollectorConnectionLeak(t *testing.T) {
121114
}
122115
}
123116

124-
// Allow time for cleanup and GC
117+
// Allow time for cleanup
125118
time.Sleep(1 * time.Second)
126-
runtime.GC()
127-
time.Sleep(100 * time.Millisecond)
128119

129120
finalConns := getServerConnectionCount(ctx, t, client)
130-
finalGoroutines := runtime.NumGoroutine()
131121
connGrowth := finalConns - initialConns
132-
goroutineGrowth := finalGoroutines - initialGoroutines
133122

134-
t.Logf("Final state: connections=%d (growth=%d), goroutines=%d (growth=%d) over %d scrapes",
135-
finalConns, connGrowth, finalGoroutines, goroutineGrowth, scrapeCount)
123+
t.Logf("Final connections: %d (growth=%d over %d scrapes)", finalConns, connGrowth, scrapeCount)
136124

137-
// Check for connection leaks
138125
// With a leak: expect ~3-4 connections per scrape (one per cluster member)
139126
// Without leak: should be near zero growth
140127
assert.Less(t, connGrowth, int64(scrapeCount),
141128
"Connection leak detected: server connections grew by %d over %d scrapes", connGrowth, scrapeCount)
142-
143-
// Check for goroutine leaks (more sensitive indicator)
144-
// Each leaked mongo.Client creates ~24 goroutines for monitoring
145-
// Allow some variance for GC timing, but flag major growth
146-
maxGoroutineGrowth := scrapeCount * 5 // Allow 5 goroutines per scrape as buffer
147-
assert.Less(t, goroutineGrowth, maxGoroutineGrowth,
148-
"Goroutine leak detected: grew by %d over %d scrapes (max allowed: %d). "+
149-
"This may indicate MongoDB clients are not being properly disconnected.",
150-
goroutineGrowth, scrapeCount, maxGoroutineGrowth)
151129
}
152130

153-
// TestMongoClientLeakOnPingFailure demonstrates the goroutine leak that occurs
154-
// when mongo.Connect succeeds but Ping fails, and Disconnect is not called.
131+
// TestPBMSDKConnectionLeakOnPingFailure tests the PBM SDK's connect.MongoConnect
132+
// for connection leaks when Ping fails after Connect succeeds.
155133
//
156-
// This test reproduces the bug in PBM SDK's connect.MongoConnectWithOpts()
157-
// where a failed Ping does not call Disconnect on the client, causing leaked
158-
// goroutines and connections.
134+
// The bug in PBM SDK's connect.MongoConnectWithOpts():
135+
// - mongo.Connect() succeeds (establishes connections)
136+
// - Ping() fails (e.g., unreachable server, wrong replica set)
137+
// - Connection is NOT disconnected -> connections leak
159138
//
160-
// The test uses a ReadPreference with a non-existent tag to force Ping to fail
161-
// after the driver has already established monitoring connections.
162-
//
163-
// This test documents the leak behavior. It passes to show the leak exists.
164-
// After the PBM SDK fix, this test can be modified to verify the fix.
139+
// This test uses an unsatisfiable ReadPreference to trigger Ping failure
140+
// after the driver has established connections.
165141
//
166142
//nolint:paralleltest
167-
func TestMongoClientLeakOnPingFailure(t *testing.T) {
143+
func TestPBMSDKConnectionLeakOnPingFailure(t *testing.T) {
168144
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
169145
defer cancel()
170146

171147
port, err := tu.PortForContainer("mongo-2-1")
172148
require.NoError(t, err)
173149

174-
// Allow GC and goroutines to stabilize before measuring
175-
runtime.GC()
150+
// Create a monitoring client to check server connection count
151+
monitorURI := fmt.Sprintf("mongodb://admin:[email protected]:%s/", port) //nolint:gosec
152+
monitorClient, err := mongo.Connect(ctx, options.Client().ApplyURI(monitorURI))
153+
require.NoError(t, err)
154+
t.Cleanup(func() {
155+
monitorClient.Disconnect(ctx) //nolint:errcheck
156+
})
157+
err = monitorClient.Ping(ctx, nil)
158+
require.NoError(t, err)
159+
160+
// Allow connections to stabilize
176161
time.Sleep(100 * time.Millisecond)
177-
initialGoroutines := runtime.NumGoroutine()
162+
163+
initialConns := getServerConnectionCount(ctx, t, monitorClient)
164+
t.Logf("Initial connections: %d", initialConns)
178165

179166
const iterations = 5
180-
leakedClients := make([]*mongo.Client, 0, iterations)
181167

182168
for i := 0; i < iterations; i++ {
183-
// Use a ReadPreference that can't be satisfied - this causes:
184-
// 1. Connect to succeed (driver starts monitoring goroutines)
185-
// 2. Ping to fail (no server matches the tag selector)
186-
opts := options.Client().
187-
ApplyURI(fmt.Sprintf("mongodb://admin:[email protected]:%s/", port)).
188-
SetReadPreference(readpref.Nearest(readpref.WithTags("dc", "nonexistent"))).
189-
SetServerSelectionTimeout(300 * time.Millisecond)
190-
191-
client, err := mongo.Connect(ctx, opts)
192-
require.NoError(t, err, "Connect should succeed")
193-
194-
// Give the driver time to establish monitoring connections
195-
time.Sleep(100 * time.Millisecond)
196-
197-
// Ping will fail because no server matches the read preference
198-
err = client.Ping(ctx, nil)
199-
require.Error(t, err, "Ping should fail due to unsatisfiable read preference")
200-
t.Logf("Iteration %d: Ping failed as expected: %v", i, err)
201-
202-
// Simulate the bug: NOT calling Disconnect after Ping failure
203-
// This is what happens in connect.MongoConnectWithOpts when Ping fails
204-
leakedClients = append(leakedClients, client)
169+
// Use a ReadPreference that cannot be satisfied to trigger the leak:
170+
// 1. mongo.Connect() succeeds and establishes connections
171+
// 2. Ping() fails because no server matches the tag selector
172+
// 3. Without proper cleanup, connections are leaked
173+
_, err := pbmconnect.MongoConnect(ctx,
174+
monitorURI,
175+
pbmconnect.AppName("leak-test"),
176+
func(opts *options.ClientOptions) error {
177+
opts.SetReadPreference(readpref.Nearest(readpref.WithTags("dc", "nonexistent")))
178+
opts.SetServerSelectionTimeout(300 * time.Millisecond)
179+
return nil
180+
},
181+
)
182+
require.Error(t, err, "MongoConnect should fail due to unsatisfiable ReadPreference")
183+
t.Logf("Iteration %d: MongoConnect failed as expected", i)
205184
}
206185

207-
// Allow time for any cleanup and measure
186+
// Allow time for any cleanup
208187
time.Sleep(500 * time.Millisecond)
209-
runtime.GC()
210-
time.Sleep(100 * time.Millisecond)
211188

212-
goroutinesAfterLeak := runtime.NumGoroutine()
213-
leakGrowth := goroutinesAfterLeak - initialGoroutines
189+
finalConns := getServerConnectionCount(ctx, t, monitorClient)
190+
connGrowth := finalConns - initialConns
214191

215-
t.Logf("After %d iterations without Disconnect: goroutines=%d (growth=%d)",
216-
iterations, goroutinesAfterLeak, leakGrowth)
192+
t.Logf("Final connections: %d (growth=%d over %d iterations)", finalConns, connGrowth, iterations)
217193

218-
// Each mongo.Client creates ~24 goroutines for monitoring
219-
// With 5 iterations, we expect ~120 leaked goroutines
220-
expectedLeakPerClient := 20 // Conservative estimate
221-
expectedTotalLeak := iterations * expectedLeakPerClient
194+
// Each leaked connection attempt should leave connections open
195+
// If there's a leak, we'd see growth proportional to iterations
196+
// Without leak, growth should be zero or minimal
197+
leakThreshold := int64(iterations * 2) // Allow some buffer
222198

223-
// Document that the leak exists
224-
if leakGrowth >= expectedTotalLeak/2 {
225-
t.Logf("LEAK CONFIRMED: %d goroutines leaked over %d iterations (expected ~%d)",
226-
leakGrowth, iterations, expectedTotalLeak)
199+
if connGrowth >= leakThreshold {
200+
t.Logf("LEAK DETECTED: %d connections leaked over %d iterations", connGrowth, iterations)
201+
t.Logf("This confirms the bug in PBM SDK's connect.MongoConnectWithOpts()")
227202
}
228203

229-
// Now properly disconnect all clients and verify cleanup works
230-
for _, client := range leakedClients {
231-
client.Disconnect(ctx) //nolint:errcheck
232-
}
233-
234-
time.Sleep(500 * time.Millisecond)
235-
runtime.GC()
236-
time.Sleep(100 * time.Millisecond)
237-
238-
goroutinesAfterCleanup := runtime.NumGoroutine()
239-
cleanupGrowth := goroutinesAfterCleanup - initialGoroutines
240-
241-
t.Logf("After Disconnect cleanup: goroutines=%d (growth from initial=%d)",
242-
goroutinesAfterCleanup, cleanupGrowth)
243-
244-
// After proper cleanup, goroutine count should return to near initial
245-
// This verifies that Disconnect() properly cleans up resources
246-
assert.Less(t, cleanupGrowth, 10,
247-
"Goroutines should return to near initial after Disconnect, but grew by %d", cleanupGrowth)
248-
249-
// Document the leak - this assertion verifies the bug exists
250-
// When NOT calling Disconnect after Ping failure, goroutines leak
251-
assert.Greater(t, leakGrowth, expectedTotalLeak/2,
252-
"Expected goroutine leak when Disconnect is not called after Ping failure. "+
253-
"Growth was %d, expected at least %d. "+
254-
"This documents the bug in PBM SDK's connect.MongoConnectWithOpts()",
255-
leakGrowth, expectedTotalLeak/2)
204+
// This test documents the leak. Once the PBM SDK is fixed, flip this assertion:
205+
// assert.Less(t, connGrowth, leakThreshold, "Connection leak in PBM SDK")
206+
assert.GreaterOrEqual(t, connGrowth, leakThreshold,
207+
"Expected connection leak when Ping fails. Growth was %d, expected at least %d. "+
208+
"If this fails, the PBM SDK may have been fixed!",
209+
connGrowth, leakThreshold)
256210
}
257211

258212
// getServerConnectionCount returns the current number of connections to the MongoDB server.

0 commit comments

Comments
 (0)