GODRIVER-1599 Expose batch length in Cursor (#402)

Divjot Arora · web-flow · commit ac7f51c1a3eb · 2020-05-07T16:35:55.000-04:00
diff --git a/mongo/crud_examples_test.go b/mongo/crud_examples_test.go
@@ -708,6 +708,43 @@ func ExampleCursor_TryNext() {
 	}
 }
 
+func ExampleCursor_RemainingBatchLength() {
+	// Because we're using a tailable cursor, this must be a handle to a capped collection.
+	var coll *mongo.Collection
+
+	// Create a tailable await cursor. Specify the MaxAwaitTime option so requests to get more data will return if there
+	// are no documents available after two seconds.
+	findOpts := options.Find().
+		SetCursorType(options.TailableAwait).
+		SetMaxAwaitTime(2 * time.Second)
+	cursor, err := coll.Find(context.TODO(), bson.D{}, findOpts)
+	if err != nil {
+		panic(err)
+	}
+
+	for {
+		// Iterate the cursor using TryNext.
+		if cursor.TryNext(context.TODO()) {
+			fmt.Println(cursor.Current)
+		}
+
+		// Handle cursor errors or the cursor being closed by the server.
+		if err = cursor.Err(); err != nil {
+			panic(err)
+		}
+		if cursor.ID() == 0 {
+			panic("cursor was unexpectedly closed by the server")
+		}
+
+		// Use the RemainingBatchLength function to rate-limit the number of network requests the driver does. If the
+		// current batch is empty, sleep for a short amount of time to let documents build up on the server before
+		// the next TryNext call, which will do a network request.
+		if cursor.RemainingBatchLength() == 0 {
+			time.Sleep(100 * time.Millisecond)
+		}
+	}
+}
+
 // ChangeStream examples
 
 func ExampleChangeStream_Next() {
diff --git a/mongo/cursor.go b/mongo/cursor.go
@@ -28,6 +28,7 @@ type Cursor struct {
 
 	bc            batchCursor
 	batch         *bsoncore.DocumentSequence
+	batchLength   int
 	registry      *bsoncodec.Registry
 	clientSession *session.Client
 
@@ -53,6 +54,10 @@ func newCursorWithSession(bc batchCursor, registry *bsoncodec.Registry, clientSe
 	if bc.ID() == 0 {
 		c.closeImplicitSession()
 	}
+
+	// Initialize just the batchLength here so RemainingBatchLength will return an accurate result. The actual batch
+	// will be pulled up by the first Next/TryNext call.
+	c.batchLength = c.bc.Batch().DocumentCount()
 	return c, nil
 }
 
@@ -102,6 +107,8 @@ func (c *Cursor) next(ctx context.Context, nonBlocking bool) bool {
 	doc, err := c.batch.Next()
 	switch err {
 	case nil:
+		// Consume the next document in the current batch.
+		c.batchLength--
 		c.Current = bson.Raw(doc)
 		return true
 	case io.EOF: // Need to do a getMore
@@ -138,10 +145,13 @@ func (c *Cursor) next(ctx context.Context, nonBlocking bool) bool {
 			c.closeImplicitSession()
 		}
 
+		// Use the new batch to update the batch and batchLength fields. Consume the first document in the batch.
 		c.batch = c.bc.Batch()
+		c.batchLength = c.batch.DocumentCount()
 		doc, err = c.batch.Next()
 		switch err {
 		case nil:
+			c.batchLength--
 			c.Current = bson.Raw(doc)
 			return true
 		case io.EOF: // Empty batch so we continue
@@ -208,6 +218,12 @@ func (c *Cursor) All(ctx context.Context, results interface{}) error {
 	return nil
 }
 
+// RemainingBatchLength returns the number of documents left in the current batch. If this returns zero, the subsequent
+// call to Next or TryNext will do a network request to fetch the next batch.
+func (c *Cursor) RemainingBatchLength() int {
+	return c.batchLength
+}
+
 // addFromBatch adds all documents from batch to sliceVal starting at the given index. It returns the new slice value,
 // the next empty index in the slice, and an error if one occurs.
 func (c *Cursor) addFromBatch(sliceVal reflect.Value, elemType reflect.Type, batch *bsoncore.DocumentSequence,
diff --git a/mongo/integration/cursor_test.go b/mongo/integration/cursor_test.go
@@ -9,6 +9,7 @@ package integration
 import (
 	"context"
 	"testing"
+	"time"
 
 	"go.mongodb.org/mongo-driver/bson"
 	"go.mongodb.org/mongo-driver/internal/testutil/assert"
@@ -24,6 +25,7 @@ const (
 func TestCursor(t *testing.T) {
 	mt := mtest.New(t, mtest.NewOptions().CreateClient(false))
 	defer mt.Close()
+	cappedCollectionOpts := bson.D{{"capped", true}, {"size", 64 * 1024}}
 
 	// server versions 2.6 and 3.0 use OP_GET_MORE so this works on >= 3.2
 	mt.RunOpts("cursor is killed on server", mtest.NewOptions().MinServerVersion("3.2"), func(mt *mtest.T) {
@@ -53,8 +55,7 @@ func TestCursor(t *testing.T) {
 			defer cursor.Close(mtest.Background)
 			tryNextExistingBatchTest(mt, cursor)
 		})
-		cappedOpts := bson.D{{"capped", true}, {"size", 64 * 1024}}
-		mt.RunOpts("one getMore sent", mtest.NewOptions().CollectionCreateOptions(cappedOpts), func(mt *mtest.T) {
+		mt.RunOpts("one getMore sent", mtest.NewOptions().CollectionCreateOptions(cappedCollectionOpts), func(mt *mtest.T) {
 			// If the current batch is empty, TryNext should send one getMore and return.
 
 			// insert a document because a tailable cursor will only have a non-zero ID if the initial Find matches
@@ -82,6 +83,88 @@ func TestCursor(t *testing.T) {
 			tryNextGetmoreError(mt, cursor)
 		})
 	})
+	mt.RunOpts("RemainingBatchLength", noClientOpts, func(mt *mtest.T) {
+		cappedMtOpts := mtest.NewOptions().CollectionCreateOptions(cappedCollectionOpts)
+		mt.RunOpts("first batch is non empty", cappedMtOpts, func(mt *mtest.T) {
+			// Test that the cursor reports the correct value for RemainingBatchLength at various execution points if
+			// the first batch from the server is non-empty.
+
+			initCollection(mt, mt.Coll)
+
+			// Create a tailable await cursor with a low cursor timeout.
+			batchSize := 2
+			findOpts := options.Find().
+				SetBatchSize(int32(batchSize)).
+				SetCursorType(options.TailableAwait).
+				SetMaxAwaitTime(100 * time.Millisecond)
+			cursor, err := mt.Coll.Find(mtest.Background, bson.D{}, findOpts)
+			assert.Nil(mt, err, "Find error: %v", err)
+			defer cursor.Close(mtest.Background)
+
+			mt.ClearEvents()
+
+			// The initial batch length should be equal to the batchSize. Do batchSize Next calls to exhaust the current
+			// batch and assert that no getMore was done.
+			assertCursorBatchLength(mt, cursor, batchSize)
+			for i := 0; i < batchSize; i++ {
+				prevLength := cursor.RemainingBatchLength()
+				if !cursor.Next(mtest.Background) {
+					mt.Fatalf("expected Next to return true on index %d; cursor err: %v", i, cursor.Err())
+				}
+
+				// Each successful Next call should decrement batch length by 1.
+				assertCursorBatchLength(mt, cursor, prevLength-1)
+			}
+			evt := mt.GetStartedEvent()
+			assert.Nil(mt, evt, "expected no events, got %v", evt)
+
+			// The batch is exhaused, so the batch length should be 0. Do one Next call, which should do a getMore and
+			// fetch batchSize more documents. The batch length after the call should be (batchSize-1) because Next consumes
+			// one document.
+			assertCursorBatchLength(mt, cursor, 0)
+
+			assert.True(mt, cursor.Next(mtest.Background), "expected Next to return true; cursor err: %v", cursor.Err())
+			evt = mt.GetStartedEvent()
+			assert.NotNil(mt, evt, "expected CommandStartedEvent, got nil")
+			assert.Equal(mt, "getMore", evt.CommandName, "expected command %q, got %q", "getMore", evt.CommandName)
+
+			assertCursorBatchLength(mt, cursor, batchSize-1)
+		})
+		mt.RunOpts("first batch is empty", mtest.NewOptions().ClientType(mtest.Mock), func(mt *mtest.T) {
+			// Test that the cursor reports the correct value for RemainingBatchLength if the first batch is empty.
+			// Using a mock deployment simplifies this test becuase the server won't create a valid cursor if the
+			// collection is empty when the find is run.
+
+			cursorID := int64(50)
+			ns := mt.DB.Name() + "." + mt.Coll.Name()
+			getMoreBatch := []bson.D{
+				{{"x", 1}},
+				{{"x", 2}},
+			}
+
+			// Create mock responses.
+			find := mtest.CreateCursorResponse(cursorID, ns, mtest.FirstBatch)
+			getMore := mtest.CreateCursorResponse(cursorID, ns, mtest.NextBatch, getMoreBatch...)
+			killCursors := mtest.CreateSuccessResponse()
+			mt.AddMockResponses(find, getMore, killCursors)
+
+			cursor, err := mt.Coll.Find(mtest.Background, bson.D{})
+			assert.Nil(mt, err, "Find error: %v", err)
+			defer cursor.Close(mtest.Background)
+			mt.ClearEvents()
+
+			for {
+				if cursor.TryNext(mtest.Background) {
+					break
+				}
+
+				assert.Nil(mt, cursor.Err(), "cursor error: %v", err)
+				assertCursorBatchLength(mt, cursor, 0)
+			}
+			// TryNext consumes one document so the remaining batch size should be len(getMoreBatch)-1.
+			assertCursorBatchLength(mt, cursor, len(getMoreBatch)-1)
+		})
+	})
 }
 
 type tryNextCursor interface {
@@ -133,3 +216,8 @@ func tryNextGetmoreError(mt *mtest.T, cursor tryNextCursor) {
 	err := cursor.Err()
 	assert.NotNil(mt, err, "expected change stream error, got nil")
 }
+
+func assertCursorBatchLength(mt *mtest.T, cursor *mongo.Cursor, expected int) {
+	batchLen := cursor.RemainingBatchLength()
+	assert.Equal(mt, expected, batchLen, "expected remaining batch length %d, got %d", expected, batchLen)
+}