Skip to content

Commit 46bced4

Browse files
committed
initial test
1 parent 7fbf7ee commit 46bced4

File tree

1 file changed

+213
-0
lines changed

1 file changed

+213
-0
lines changed

exporter/pbm_collector_test.go

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,21 @@ package exporter
1717

1818
import (
1919
"context"
20+
"fmt"
21+
"runtime"
2022
"strings"
2123
"testing"
2224
"time"
2325

26+
"github.com/prometheus/client_golang/prometheus"
2427
"github.com/prometheus/client_golang/prometheus/testutil"
2528
"github.com/prometheus/common/promslog"
2629
"github.com/stretchr/testify/assert"
2730
"github.com/stretchr/testify/require"
31+
"go.mongodb.org/mongo-driver/bson"
32+
"go.mongodb.org/mongo-driver/mongo"
33+
"go.mongodb.org/mongo-driver/mongo/options"
34+
"go.mongodb.org/mongo-driver/mongo/readpref"
2835

2936
"github.com/percona/mongodb_exporter/internal/tu"
3037
)
@@ -64,3 +71,209 @@ func TestPBMCollector(t *testing.T) {
6471
assert.Equal(t, expectedLength, count, "PBM metrics are missing")
6572
})
6673
}
74+
75+
// TestPBMCollectorConnectionLeak verifies that the PBM collector does not leak
76+
// MongoDB connections or goroutines across multiple scrape cycles.
77+
//
78+
// This test monitors both server-side connections and client-side goroutines
79+
// to detect leaks. A leak in the PBM SDK's connect.MongoConnectWithOpts()
80+
// function (where Ping() failure doesn't call Disconnect()) would cause
81+
// goroutine growth even if server connections appear stable.
82+
//
83+
//nolint:paralleltest
84+
func TestPBMCollectorConnectionLeak(t *testing.T) {
85+
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
86+
defer cancel()
87+
88+
port, err := tu.PortForContainer("mongo-2-1")
89+
require.NoError(t, err)
90+
91+
mongoURI := fmt.Sprintf(
92+
"mongodb://admin:[email protected]:%s/?connectTimeoutMS=500&serverSelectionTimeoutMS=500",
93+
port,
94+
) //nolint:gosec
95+
96+
client, err := mongo.Connect(ctx, options.Client().ApplyURI(mongoURI))
97+
require.NoError(t, err)
98+
t.Cleanup(func() {
99+
client.Disconnect(ctx) //nolint:errcheck
100+
})
101+
err = client.Ping(ctx, nil)
102+
require.NoError(t, err)
103+
104+
// Allow GC and goroutines to stabilize before measuring
105+
runtime.GC()
106+
time.Sleep(100 * time.Millisecond)
107+
108+
initialConns := getServerConnectionCount(ctx, t, client)
109+
initialGoroutines := runtime.NumGoroutine()
110+
t.Logf("Initial state: connections=%d, goroutines=%d", initialConns, initialGoroutines)
111+
112+
c := newPbmCollector(ctx, client, mongoURI, promslog.New(&promslog.Config{}))
113+
114+
// Run multiple collection cycles
115+
const scrapeCount = 10
116+
for i := 0; i < scrapeCount; i++ {
117+
ch := make(chan prometheus.Metric, 100)
118+
c.Collect(ch)
119+
close(ch)
120+
for range ch {
121+
}
122+
}
123+
124+
// Allow time for cleanup and GC
125+
time.Sleep(1 * time.Second)
126+
runtime.GC()
127+
time.Sleep(100 * time.Millisecond)
128+
129+
finalConns := getServerConnectionCount(ctx, t, client)
130+
finalGoroutines := runtime.NumGoroutine()
131+
connGrowth := finalConns - initialConns
132+
goroutineGrowth := finalGoroutines - initialGoroutines
133+
134+
t.Logf("Final state: connections=%d (growth=%d), goroutines=%d (growth=%d) over %d scrapes",
135+
finalConns, connGrowth, finalGoroutines, goroutineGrowth, scrapeCount)
136+
137+
// Check for connection leaks
138+
// With a leak: expect ~3-4 connections per scrape (one per cluster member)
139+
// Without leak: should be near zero growth
140+
assert.Less(t, connGrowth, int64(scrapeCount),
141+
"Connection leak detected: server connections grew by %d over %d scrapes", connGrowth, scrapeCount)
142+
143+
// Check for goroutine leaks (more sensitive indicator)
144+
// Each leaked mongo.Client creates ~24 goroutines for monitoring
145+
// Allow some variance for GC timing, but flag major growth
146+
maxGoroutineGrowth := scrapeCount * 5 // Allow 5 goroutines per scrape as buffer
147+
assert.Less(t, goroutineGrowth, maxGoroutineGrowth,
148+
"Goroutine leak detected: grew by %d over %d scrapes (max allowed: %d). "+
149+
"This may indicate MongoDB clients are not being properly disconnected.",
150+
goroutineGrowth, scrapeCount, maxGoroutineGrowth)
151+
}
152+
153+
// TestMongoClientLeakOnPingFailure demonstrates the goroutine leak that occurs
154+
// when mongo.Connect succeeds but Ping fails, and Disconnect is not called.
155+
//
156+
// This test reproduces the bug in PBM SDK's connect.MongoConnectWithOpts()
157+
// where a failed Ping does not call Disconnect on the client, causing leaked
158+
// goroutines and connections.
159+
//
160+
// The test uses a ReadPreference with a non-existent tag to force Ping to fail
161+
// after the driver has already established monitoring connections.
162+
//
163+
// This test documents the leak behavior. It passes to show the leak exists.
164+
// After the PBM SDK fix, this test can be modified to verify the fix.
165+
//
166+
//nolint:paralleltest
167+
func TestMongoClientLeakOnPingFailure(t *testing.T) {
168+
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
169+
defer cancel()
170+
171+
port, err := tu.PortForContainer("mongo-2-1")
172+
require.NoError(t, err)
173+
174+
// Allow GC and goroutines to stabilize before measuring
175+
runtime.GC()
176+
time.Sleep(100 * time.Millisecond)
177+
initialGoroutines := runtime.NumGoroutine()
178+
179+
const iterations = 5
180+
leakedClients := make([]*mongo.Client, 0, iterations)
181+
182+
for i := 0; i < iterations; i++ {
183+
// Use a ReadPreference that can't be satisfied - this causes:
184+
// 1. Connect to succeed (driver starts monitoring goroutines)
185+
// 2. Ping to fail (no server matches the tag selector)
186+
opts := options.Client().
187+
ApplyURI(fmt.Sprintf("mongodb://admin:[email protected]:%s/", port)).
188+
SetReadPreference(readpref.Nearest(readpref.WithTags("dc", "nonexistent"))).
189+
SetServerSelectionTimeout(300 * time.Millisecond)
190+
191+
client, err := mongo.Connect(ctx, opts)
192+
require.NoError(t, err, "Connect should succeed")
193+
194+
// Give the driver time to establish monitoring connections
195+
time.Sleep(100 * time.Millisecond)
196+
197+
// Ping will fail because no server matches the read preference
198+
err = client.Ping(ctx, nil)
199+
require.Error(t, err, "Ping should fail due to unsatisfiable read preference")
200+
t.Logf("Iteration %d: Ping failed as expected: %v", i, err)
201+
202+
// Simulate the bug: NOT calling Disconnect after Ping failure
203+
// This is what happens in connect.MongoConnectWithOpts when Ping fails
204+
leakedClients = append(leakedClients, client)
205+
}
206+
207+
// Allow time for any cleanup and measure
208+
time.Sleep(500 * time.Millisecond)
209+
runtime.GC()
210+
time.Sleep(100 * time.Millisecond)
211+
212+
goroutinesAfterLeak := runtime.NumGoroutine()
213+
leakGrowth := goroutinesAfterLeak - initialGoroutines
214+
215+
t.Logf("After %d iterations without Disconnect: goroutines=%d (growth=%d)",
216+
iterations, goroutinesAfterLeak, leakGrowth)
217+
218+
// Each mongo.Client creates ~24 goroutines for monitoring
219+
// With 5 iterations, we expect ~120 leaked goroutines
220+
expectedLeakPerClient := 20 // Conservative estimate
221+
expectedTotalLeak := iterations * expectedLeakPerClient
222+
223+
// Document that the leak exists
224+
if leakGrowth >= expectedTotalLeak/2 {
225+
t.Logf("LEAK CONFIRMED: %d goroutines leaked over %d iterations (expected ~%d)",
226+
leakGrowth, iterations, expectedTotalLeak)
227+
}
228+
229+
// Now properly disconnect all clients and verify cleanup works
230+
for _, client := range leakedClients {
231+
client.Disconnect(ctx) //nolint:errcheck
232+
}
233+
234+
time.Sleep(500 * time.Millisecond)
235+
runtime.GC()
236+
time.Sleep(100 * time.Millisecond)
237+
238+
goroutinesAfterCleanup := runtime.NumGoroutine()
239+
cleanupGrowth := goroutinesAfterCleanup - initialGoroutines
240+
241+
t.Logf("After Disconnect cleanup: goroutines=%d (growth from initial=%d)",
242+
goroutinesAfterCleanup, cleanupGrowth)
243+
244+
// After proper cleanup, goroutine count should return to near initial
245+
// This verifies that Disconnect() properly cleans up resources
246+
assert.Less(t, cleanupGrowth, 10,
247+
"Goroutines should return to near initial after Disconnect, but grew by %d", cleanupGrowth)
248+
249+
// Document the leak - this assertion verifies the bug exists
250+
// When NOT calling Disconnect after Ping failure, goroutines leak
251+
assert.Greater(t, leakGrowth, expectedTotalLeak/2,
252+
"Expected goroutine leak when Disconnect is not called after Ping failure. "+
253+
"Growth was %d, expected at least %d. "+
254+
"This documents the bug in PBM SDK's connect.MongoConnectWithOpts()",
255+
leakGrowth, expectedTotalLeak/2)
256+
}
257+
258+
// getServerConnectionCount returns the current number of connections to the MongoDB server.
259+
func getServerConnectionCount(ctx context.Context, t *testing.T, client *mongo.Client) int64 {
260+
t.Helper()
261+
262+
var result bson.M
263+
err := client.Database("admin").RunCommand(ctx, bson.D{{Key: "serverStatus", Value: 1}}).Decode(&result)
264+
require.NoError(t, err)
265+
266+
connections, ok := result["connections"].(bson.M)
267+
require.True(t, ok, "serverStatus should contain connections field")
268+
269+
current, ok := connections["current"].(int32)
270+
if ok {
271+
return int64(current)
272+
}
273+
274+
// Try int64 in case MongoDB returns it differently
275+
current64, ok := connections["current"].(int64)
276+
require.True(t, ok, "connections.current should be numeric")
277+
278+
return current64
279+
}

0 commit comments

Comments
 (0)