test(sources/s3): fix infinite blocking and timeout issue in TestSource_Chunks (#4048)

Juneezee · web-flow · commit a4838d4ebd4c · 2025-04-24T10:06:57.000-04:00
This PR addresses two issues: Duplicate test functions in s3_integration_test.go. The file currently contains two separate TestSourceChunksNoResumption. The only differences between them are the use of t.Parallel() in one and the bucket name. This PR merges both into a single, table-driven test Possible infinite block in TestSource_Chunks As reported in f#4069 (comment), the test TestSource_Chunks in s3_test.go may block indefinitely. This is due to the use of an unbuffered channel (chunksCh), combined with only a single receive operation (gotChunk := <-chunksCh). If the test bucket contains more than one chunk, the s.Chunks(ctx, chunksCh) call will block, causing the test to hang. See #4048 (comment) for full explanation. * test(sources/s3): remove duplicate integration test case Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * Merge two TestSourceChunksNoResumption into table-driven test Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * Fix infinite blocking and timeout issue in TestSource_Chunks Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * Drain `chunksCh` channel Reference: #4048 (review) Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * Add missing return for ctx.Done() case Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> --------- Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>
diff --git a/pkg/sources/s3/s3_integration_test.go b/pkg/sources/s3/s3_integration_test.go
@@ -82,37 +82,6 @@ func TestSource_ChunksLarge(t *testing.T) {
 	assert.Equal(t, got, wantChunkCount)
 }
 
-func TestSourceChunksNoResumption(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
-	defer cancel()
-
-	s := Source{}
-	connection := &sourcespb.S3{
-		Credential: &sourcespb.S3_Unauthenticated{},
-		Buckets:    []string{"trufflesec-ahrav-test-2"},
-	}
-	conn, err := anypb.New(connection)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	err = s.Init(ctx, "test name", 0, 0, false, conn, 1)
-	chunksCh := make(chan *sources.Chunk)
-	go func() {
-		defer close(chunksCh)
-		err = s.Chunks(ctx, chunksCh)
-		assert.Nil(t, err)
-	}()
-
-	wantChunkCount := 19787
-	got := 0
-
-	for range chunksCh {
-		got++
-	}
-	assert.Equal(t, got, wantChunkCount)
-}
-
 func TestSource_Validate(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), time.Second*15)
 	defer cancel()
@@ -251,34 +220,50 @@ func TestSource_Validate(t *testing.T) {
 func TestSourceChunksNoResumption(t *testing.T) {
 	t.Parallel()
 
-	ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
-	defer cancel()
-
-	s := Source{}
-	connection := &sourcespb.S3{
-		Credential: &sourcespb.S3_Unauthenticated{},
-		Buckets:    []string{"integration-resumption-tests"},
-	}
-	conn, err := anypb.New(connection)
-	if err != nil {
-		t.Fatal(err)
+	tests := []struct {
+		bucket         string
+		wantChunkCount int
+	}{
+		{
+			bucket:         "trufflesec-ahrav-test-2",
+			wantChunkCount: 19787,
+		},
+		{
+			bucket:         "integration-resumption-tests",
+			wantChunkCount: 19787,
+		},
 	}
 
-	err = s.Init(ctx, "test name", 0, 0, false, conn, 1)
-	chunksCh := make(chan *sources.Chunk)
-	go func() {
-		defer close(chunksCh)
-		err = s.Chunks(ctx, chunksCh)
-		assert.Nil(t, err)
-	}()
-
-	wantChunkCount := 19787
-	got := 0
+	for _, tt := range tests {
+		t.Run(tt.bucket, func(t *testing.T) {
+			ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
+			defer cancel()
+
+			s := Source{}
+			connection := &sourcespb.S3{
+				Credential: &sourcespb.S3_Unauthenticated{},
+				Buckets:    []string{tt.bucket},
+			}
+			conn, err := anypb.New(connection)
+			if err != nil {
+				t.Fatal(err)
+			}
 
-	for range chunksCh {
-		got++
+			err = s.Init(ctx, "test name", 0, 0, false, conn, 1)
+			chunksCh := make(chan *sources.Chunk)
+			go func() {
+				defer close(chunksCh)
+				err = s.Chunks(ctx, chunksCh)
+				assert.Nil(t, err)
+			}()
+
+			got := 0
+			for range chunksCh {
+				got++
+			}
+			assert.Equal(t, tt.wantChunkCount, got)
+		})
 	}
-	assert.Equal(t, wantChunkCount, got)
 }
 
 func TestSourceChunksResumption(t *testing.T) {
diff --git a/pkg/sources/s3/s3_test.go b/pkg/sources/s3/s3_test.go
@@ -4,7 +4,6 @@ import (
 	"encoding/base64"
 	"fmt"
 	"os"
-	"sync"
 	"testing"
 	"time"
 
@@ -99,8 +98,7 @@ func TestSource_Chunks(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
-			var cancelOnce sync.Once
-			defer cancelOnce.Do(cancel)
+			defer cancel()
 
 			for k, v := range tt.init.setEnv {
 				t.Setenv(k, v)
@@ -117,26 +115,47 @@ func TestSource_Chunks(t *testing.T) {
 				t.Errorf("Source.Init() error = %v, wantErr %v", err, tt.wantErr)
 				return
 			}
-			chunksCh := make(chan *sources.Chunk)
-			var wg sync.WaitGroup
-			wg.Add(1)
+			chunksCh := make(chan *sources.Chunk, 1)
 			go func() {
-				defer wg.Done()
+				defer close(chunksCh)
 				err = s.Chunks(ctx, chunksCh)
 				if (err != nil) != tt.wantErr {
 					t.Errorf("Source.Chunks() error = %v, wantErr %v", err, tt.wantErr)
 					os.Exit(1)
 				}
 			}()
-			gotChunk := <-chunksCh
-			wantData, _ := base64.StdEncoding.DecodeString(tt.wantChunkData)
 
-			if diff := pretty.Compare(gotChunk.Data, wantData); diff != "" {
-				t.Errorf("%s: Source.Chunks() diff: (-got +want)\n%s", tt.name, diff)
+			waitFn := func() {
+				receivedFirstChunk := false
+				for {
+					select {
+					case <-ctx.Done():
+						t.Errorf("TestSource_Chunks timed out: %v", ctx.Err())
+						return
+					case gotChunk, ok := <-chunksCh:
+						if !ok {
+							t.Logf("Source.Chunks() finished, channel closed")
+							assert.Equal(t, "", s.GetProgress().EncodedResumeInfo)
+							assert.Equal(t, int64(100), s.GetProgress().PercentComplete)
+							return
+						}
+						if receivedFirstChunk {
+							// wantChunkData is the first chunk data. After the first chunk has
+							// been received and matched below, we want to drain chunksCh
+							// so Source.Chunks() can finish completely.
+							continue
+						}
+
+						receivedFirstChunk = true
+						wantData, _ := base64.StdEncoding.DecodeString(tt.wantChunkData)
+
+						if diff := pretty.Compare(gotChunk.Data, wantData); diff != "" {
+							t.Logf("%s: Source.Chunks() diff: (-got +want)\n%s", tt.name, diff)
+						}
+					}
+				}
 			}
-			wg.Wait()
-			assert.Equal(t, "", s.GetProgress().EncodedResumeInfo)
-			assert.Equal(t, int64(100), s.GetProgress().PercentComplete)
+			waitFn()
 		})
 	}
 }