Pool gzip writers to reduce RabbitMQ message compression allocations (#3103)

mnafees · web-flow · commit 9e002a6c4cc8 · 2026-03-12T13:09:11.000+01:00
* use sync.Pool for gzip writers to avoid memory hogging

* comments

* fix deadlock failure

* copilot suggestion

* fix formatting

* use asserts

* PR comments
diff --git a/.github/workflows/go-benchmarks.yml b/.github/workflows/go-benchmarks.yml
@@ -0,0 +1,118 @@
+name: go-benchmarks
+on:
+  pull_request:
+    paths:
+      - "**.go"
+      - "go.mod"
+      - "go.sum"
+
+concurrency:
+  group: go-benchmarks-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  benchmark:
+    runs-on: ubicloud-standard-8
+    timeout-minutes: 15
+    permissions:
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+
+      - name: Setup Go
+        uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
+        with:
+          go-version: "1.25"
+
+      - name: Install benchstat
+        run: go install golang.org/x/perf/cmd/benchstat@latest
+
+      - name: Find packages with benchmark tests
+        id: packages
+        run: |
+          # Find Go packages that contain Benchmark functions in _test.go files
+          CHANGED_DIRS=$(git diff --name-only ${{ github.event.pull_request.base.sha }}..HEAD -- '*.go' | xargs -I{} dirname {} | sort -u)
+          BENCH_PKGS=""
+          for dir in $CHANGED_DIRS; do
+            if grep -rq '^func Benchmark' "$dir"/*_test.go 2>/dev/null; then
+              BENCH_PKGS="$BENCH_PKGS ./$dir"
+            fi
+          done
+          if [ -z "$BENCH_PKGS" ]; then
+            echo "found=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "found=true" >> "$GITHUB_OUTPUT"
+            echo "pkgs=$BENCH_PKGS" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Run benchmarks on PR branch
+        if: steps.packages.outputs.found == 'true'
+        run: go test -run='^$' -bench=. -benchmem -count=6 -timeout=10m ${{ steps.packages.outputs.pkgs }} 2>/dev/null | tee /tmp/new.txt
+
+      - name: Run benchmarks on base branch
+        if: steps.packages.outputs.found == 'true'
+        run: |
+          git checkout ${{ github.event.pull_request.base.sha }}
+          go test -run='^$' -bench=. -benchmem -count=6 -timeout=10m ${{ steps.packages.outputs.pkgs }} 2>/dev/null | tee /tmp/old.txt
+          git checkout ${{ github.event.pull_request.head.sha }}
+
+      - name: Compare with benchstat
+        if: steps.packages.outputs.found == 'true'
+        id: benchstat
+        run: |
+          RESULT=$(benchstat /tmp/old.txt /tmp/new.txt 2>&1 || true)
+          if [ -z "$RESULT" ]; then
+            echo "empty=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "empty=false" >> "$GITHUB_OUTPUT"
+            {
+              echo "result<<BENCHSTAT_EOF"
+              echo "$RESULT"
+              echo "BENCHSTAT_EOF"
+            } >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Post results as PR comment
+        if: steps.packages.outputs.found == 'true' && steps.benchstat.outputs.empty == 'false'
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        with:
+          script: |
+            const result = `${{ steps.benchstat.outputs.result }}`;
+            const lines = [
+              '## Benchmark results',
+              '',
+              '```',
+              result.trim(),
+              '```',
+              '',
+              `<sub>Compared against \`${{ github.event.pull_request.base.ref }}\` (${`${{ github.event.pull_request.base.sha }}`.slice(0, 8)})</sub>`,
+            ];
+            const body = lines.join('\n');
+
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+
+            const existing = comments.find(c =>
+              c.user.type === 'Bot' && c.body.startsWith('## Benchmark results')
+            );
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+            }
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -547,32 +547,51 @@ jobs:
       - name: Add go-deadlock dependency
         run: go get github.com/sasha-s/go-deadlock@v0.3.6
 
-      - name: Patch sync imports to use go-deadlock (sed)
+      - name: Patch sync.Mutex/RWMutex to use go-deadlock
         shell: bash
         run: |
           set -euo pipefail
 
-          # Replace ONLY the stdlib "sync" import with an alias that preserves `sync.X` call sites.
-          # - `import "sync"` -> `import sync "github.com/sasha-s/go-deadlock"`
-          # - within import blocks: `"sync"` -> `sync "github.com/sasha-s/go-deadlock"`
-          # NOTE: use `-i''` (no backup) for portability across GNU/BSD sed.
-          find . -name '*.go' -not -path './vendor/*' -print0 | xargs -0 sed -i'' -E \
-            -e 's/^([[:space:]]*)import[[:space:]]+"sync"[[:space:]]*$/\1import sync "github.com\/sasha-s\/go-deadlock"/' \
-            -e 's/^([[:space:]]*)"sync"[[:space:]]*$/\1sync "github.com\/sasha-s\/go-deadlock"/'
+          # Find only files that actually use sync.Mutex or sync.RWMutex.
+          grep -rl --include='*.go' -E 'sync\.(RW)?Mutex' . | grep -v '/vendor/' > /tmp/mutex_files.txt || true
 
-          # Keep formatting/import grouping consistent after rewriting.
-          find . -name '*.go' -not -path './vendor/*' -print0 | xargs -0 gofmt -w
+          if [ ! -s /tmp/mutex_files.txt ]; then
+            echo "No files with sync.Mutex or sync.RWMutex found"
+            exit 0
+          fi
+
+          echo "Patching $(wc -l < /tmp/mutex_files.txt) file(s):"
+          cat /tmp/mutex_files.txt
+
+          while IFS= read -r f; do
+            # Rewrite type references. RWMutex first so its 'Mutex' suffix isn't clobbered.
+            sed -i'' -E \
+              -e 's/sync\.RWMutex/deadlocksync.RWMutex/g' \
+              -e 's/sync\.Mutex/deadlocksync.Mutex/g' \
+              "$f"
+
+            # \bsync\. won't match inside "deadlocksync.", so this correctly detects
+            # remaining usages of other sync primitives (WaitGroup, Once, Map, etc.).
+            if grep -qE '\bsync\.' "$f"; then
+              # File still needs the sync package; insert deadlocksync import alongside it.
+              awk '
+                /^\t"sync"$/ { print; print "\tdeadlocksync \"github.com/sasha-s/go-deadlock\""; next }
+                { print }
+              ' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f"
+            else
+              # sync package no longer needed; replace its import with the deadlocksync alias.
+              sed -i'' \
+                -e 's|^\t"sync"$|\tdeadlocksync "github.com/sasha-s/go-deadlock"|' \
+                -e 's|^import "sync"$|import deadlocksync "github.com/sasha-s/go-deadlock"|' \
+                "$f"
+            fi
+          done < /tmp/mutex_files.txt
+
+          xargs gofmt -w < /tmp/mutex_files.txt
 
-          # Evidence in CI logs that rewriting happened (or not).
           echo "Changed Go files (after patch):"
           git diff --name-only -- '*.go' || true
 
-          echo ""
-          echo "Contents of pkg/scheduling/v1/scheduler.go after patch:"
-          echo "----"
-          cat pkg/scheduling/v1/scheduler.go
-          echo "----"
-
       - name: Test (deadlock-instrumented)
         run: |
           # Disable gzip compression for load tests to reduce CPU overhead
diff --git a/internal/msgqueue/rabbitmq/gzip.go b/internal/msgqueue/rabbitmq/gzip.go
@@ -5,6 +5,8 @@ import (
 	"compress/gzip"
 	"fmt"
 	"io"
+
+	"sync"
 )
 
 type CompressionResult struct {
@@ -17,6 +19,17 @@ type CompressionResult struct {
 	CompressionRatio float64
 }
 
+// gzipWriterPool reuses gzip.Writer instances to avoid repeated allocations.
+// No explicit size cap is needed: sync.Pool is self-limiting because the Go
+// runtime evicts pooled objects during GC, so the pool cannot grow unbounded.
+// In practice the pool size is also bounded by the number of goroutines
+// concurrently compressing, which is small for a RabbitMQ publish path.
+var gzipWriterPool = sync.Pool{
+	New: func() any {
+		return gzip.NewWriter(nil)
+	},
+}
+
 func getPayloadSize(payloads [][]byte) int {
 	totalSize := 0
 	for _, payload := range payloads {
@@ -53,17 +66,21 @@ func (t *MessageQueueImpl) compressPayloads(payloads [][]byte) (*CompressionResu
 
 	for i, payload := range payloads {
 		var buf bytes.Buffer
-		gzipWriter := gzip.NewWriter(&buf)
 
-		if _, err := gzipWriter.Write(payload); err != nil {
-			gzipWriter.Close()
+		w := gzipWriterPool.Get().(*gzip.Writer)
+		w.Reset(&buf)
+
+		if _, err := w.Write(payload); err != nil {
+			w.Close()
 			return nil, fmt.Errorf("failed to write to gzip writer: %w", err)
 		}
 
-		if err := gzipWriter.Close(); err != nil {
+		if err := w.Close(); err != nil {
 			return nil, fmt.Errorf("failed to close gzip writer: %w", err)
 		}
 
+		gzipWriterPool.Put(w)
+
 		compressed[i] = buf.Bytes()
 		compressedSize += len(compressed[i])
 	}
diff --git a/internal/msgqueue/rabbitmq/gzip_test.go b/internal/msgqueue/rabbitmq/gzip_test.go
@@ -0,0 +1,120 @@
+package rabbitmq
+
+import (
+	"bytes"
+	"crypto/rand"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func generatePayloads(count, size int) [][]byte {
+	payloads := make([][]byte, count)
+
+	for i := range payloads {
+		data := make([]byte, size)
+
+		_, err := rand.Read(data)
+		if err != nil {
+			panic(err)
+		}
+
+		payloads[i] = data
+	}
+
+	return payloads
+}
+
+func newMQ() *MessageQueueImpl {
+	return &MessageQueueImpl{
+		compressionEnabled:   true,
+		compressionThreshold: 0,
+	}
+}
+
+func TestCompressDecompressRoundtrip(t *testing.T) {
+	mq := newMQ()
+	payloads := generatePayloads(5, 10*1024)
+	result, err := mq.compressPayloads(payloads)
+
+	assert.NoError(t, err)
+	assert.True(t, result.WasCompressed, "expected WasCompressed to be true")
+	assert.Equal(t, len(payloads), len(result.Payloads), "expected %d payloads, got %d", len(payloads), len(result.Payloads))
+
+	decompressed, err := mq.decompressPayloads(result.Payloads)
+
+	assert.NoError(t, err)
+
+	for i := range payloads {
+		assert.Equal(t, len(payloads[i]), len(decompressed[i]), "payload %d: expected len %d, got %d", i, len(payloads[i]), len(decompressed[i]))
+		assert.True(t, bytes.Equal(decompressed[i], payloads[i]), "payload %d: decompressed payload does not match original payload", i)
+	}
+}
+
+func TestCompressPayloadsDisabled(t *testing.T) {
+	mq := &MessageQueueImpl{
+		compressionEnabled:   false,
+		compressionThreshold: 0,
+	}
+
+	payloads := generatePayloads(3, 1024)
+	result, err := mq.compressPayloads(payloads)
+
+	assert.NoError(t, err)
+	assert.False(t, result.WasCompressed, "expected WasCompressed to be false when compression is disabled")
+}
+
+func TestCompressPayloadsBelowThreshold(t *testing.T) {
+	mq := &MessageQueueImpl{
+		compressionEnabled:   true,
+		compressionThreshold: 100 * 1024,
+	}
+
+	payloads := generatePayloads(1, 1024)
+	result, err := mq.compressPayloads(payloads)
+
+	assert.NoError(t, err)
+	assert.False(t, result.WasCompressed, "expected WasCompressed to be false when below threshold")
+}
+
+func BenchmarkCompressPayloads_1x10KiB(b *testing.B) {
+	mq := newMQ()
+	payloads := generatePayloads(1, 10*1024)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_, _ = mq.compressPayloads(payloads)
+	}
+}
+
+func BenchmarkCompressPayloads_10x10KiB(b *testing.B) {
+	mq := newMQ()
+	payloads := generatePayloads(10, 10*1024)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_, _ = mq.compressPayloads(payloads)
+	}
+}
+
+func BenchmarkCompressPayloads_10x100KiB(b *testing.B) {
+	mq := newMQ()
+	payloads := generatePayloads(10, 100*1024)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_, _ = mq.compressPayloads(payloads)
+	}
+}
+
+func BenchmarkCompressPayloads_Concurrent(b *testing.B) {
+	mq := newMQ()
+	payloads := generatePayloads(5, 10*1024)
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			_, _ = mq.compressPayloads(payloads)
+		}
+	})
+}