Merge pull request #1236 from twmb/1195

twmb · web-flow · commit 6999fb839939 · 2026-02-18T00:32:07.000-07:00
kgo: unlinger partitions in ProduceSync to avoid linger delay
diff --git a/pkg/kfake/behavior_test.go b/pkg/kfake/behavior_test.go
@@ -1785,4 +1785,56 @@ func TestTxnDescribeTransactions(t *testing.T) {
 	}
 }
 
+// TestProduceSyncUnlinger verifies that ProduceSync does not wait for the full
+// linger duration before completing. With a 10s linger, ProduceSync should
+// still return quickly because it unlingers partitions after enqueuing records.
+func TestProduceSyncUnlinger(t *testing.T) {
+	t.Parallel()
+	topic := "produce-sync-unlinger"
+	c := newCluster(t, kfake.NumBrokers(1), kfake.SeedTopics(1, topic))
+
+	producer := newPlainClient(t, c,
+		kgo.DefaultProduceTopic(topic),
+		kgo.ProducerLinger(10*time.Second),
+	)
+
+	// Produce one record and flush to load topic metadata and
+	// establish connections. Without this, the first ProduceSync
+	// would buffer to the unknown-topic path and not benefit from
+	// the unlinger optimization.
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+	defer cancel()
+	producer.Produce(ctx, kgo.StringRecord("warmup"), nil)
+	if err := producer.Flush(ctx); err != nil {
+		t.Fatalf("warmup flush failed: %v", err)
+	}
+
+	start := time.Now()
+	results := producer.ProduceSync(ctx, kgo.StringRecord("v1"), kgo.StringRecord("v2"), kgo.StringRecord("v3"))
+	elapsed := time.Since(start)
+
+	if err := results.FirstErr(); err != nil {
+		t.Fatalf("ProduceSync failed: %v", err)
+	}
+	if len(results) != 3 {
+		t.Fatalf("expected 3 results, got %d", len(results))
+	}
+
+	// With the unlinger fix, ProduceSync should complete well within 5s
+	// despite the 10s linger. Without the fix, it would block for 10s.
+	if elapsed > 5*time.Second {
+		t.Fatalf("ProduceSync took %v, expected well under 5s with unlinger", elapsed)
+	}
+
+	// Verify all records are consumable (warmup + 3 = 4 records total).
+	consumer := newPlainClient(t, c,
+		kgo.ConsumeTopics(topic),
+		kgo.ConsumeResetOffset(kgo.NewOffset().AtStart()),
+	)
+	records := consumeN(t, consumer, 4, 5*time.Second)
+	if len(records) != 4 {
+		t.Fatalf("expected 4 consumed records, got %d", len(records))
+	}
+}
+
 func stringp(s string) *string { return &s }
diff --git a/pkg/kgo/producer.go b/pkg/kgo/producer.go
@@ -311,8 +311,13 @@ func (rs ProduceResults) First() (*Record, error) {
 // ProduceSync is a synchronous produce. See the [Produce] documentation for an
 // in depth description of how producing works.
 //
-// This function simply produces all records in one range loop and waits for
-// them all to be produced before returning.
+// This function produces all records and waits for them all to be produced
+// before returning. If the client has a non-zero linger configured, after all
+// records are enqueued, this function stops lingering and triggers an immediate
+// drain on all partitions that records were produced to. This avoids
+// unnecessarily waiting for linger timers when the caller is synchronously
+// waiting for results. Partitions that are lingering due to concurrent
+// [Produce] calls are not affected.
 func (cl *Client) ProduceSync(ctx context.Context, rs ...*Record) ProduceResults {
 	var (
 		wg      sync.WaitGroup
@@ -324,9 +329,76 @@ func (cl *Client) ProduceSync(ctx context.Context, rs ...*Record) ProduceResults
 	)
 
 	wg.Add(len(rs))
+
+	// After each Produce call for a known topic, the record's Partition
+	// field is already set (see bufferRecord), allowing us to collect
+	// which recBufs to unlinger without a second pass over the records.
+	// We use a [16] base array to avoid heap allocation in the common
+	// case, and linear dedup since the number of unique partitions is
+	// typically small.
+	//
+	// We load partition data BEFORE calling Produce to avoid a data
+	// race on r.Partition. If partitions exist before Produce,
+	// partitionsForTopicProduce will also see them (partition counts
+	// are monotonically increasing) and will partition the record
+	// synchronously in bufferRecord, making r.Partition safe to read
+	// after Produce returns. If pd is nil, we never read r.Partition,
+	// avoiding a race with the metadata goroutine which partitions
+	// unknownTopics records asynchronously.
+	var (
+		buf      [16]*recBuf
+		unlinger = buf[:0]
+		topics   topicsPartitionsData
+
+		lastTopic string
+		lastPD    *topicPartitionsData
+	)
+	if cl.cfg.linger > 0 {
+		topics = cl.producer.topics.load()
+	}
+
 	for _, r := range rs {
+		var pd *topicPartitionsData
+		if topics != nil {
+			if r.Topic == "" || cl.cfg.defaultProduceTopicAlways {
+				r.Topic = cl.cfg.defaultProduceTopic
+			}
+			if r.Topic == lastTopic {
+				pd = lastPD
+			} else if parts, ok := topics[r.Topic]; ok {
+				if v := parts.load(); len(v.partitions) > 0 {
+					pd = v
+				}
+				lastTopic = r.Topic
+				lastPD = pd
+			}
+		}
+
 		cl.Produce(ctx, r, promise)
+
+		if pd == nil {
+			continue
+		}
+		if int(r.Partition) >= len(pd.partitions) {
+			continue
+		}
+		rb := pd.partitions[r.Partition].records
+		var seen bool
+		for _, have := range unlinger {
+			if have == rb {
+				seen = true
+				break
+			}
+		}
+		if !seen {
+			unlinger = append(unlinger, rb)
+		}
 	}
+
+	for _, rb := range unlinger {
+		rb.unlingerAndManuallyDrain()
+	}
+
 	wg.Wait()
 
 	return results
@@ -594,7 +666,6 @@ type batchPromise struct {
 	epoch      int16
 	attrs      RecordAttrs
 	beforeBuf  bool
-	partition  int32
 	recs       []promisedRec
 	err        error
 }
@@ -632,7 +703,6 @@ start:
 		} else {
 			pr.Offset = b.baseOffset + int64(i)
 		}
-		pr.Partition = b.partition
 		pr.ProducerID = b.pid
 		pr.ProducerEpoch = b.epoch
 		pr.Attrs = b.attrs
diff --git a/pkg/kgo/sink.go b/pkg/kgo/sink.go
@@ -688,7 +688,7 @@ func (s *sink) handleReqRespNoack(b *bytes.Buffer, debug bool, req *produceReque
 				if debug {
 					fmt.Fprintf(b, "%d{0=>%d}, ", partition, len(batch.records))
 				}
-				s.cl.finishBatch(batch.recBatch, req.producerID, req.producerEpoch, partition, 0, nil)
+				s.cl.finishBatch(batch.recBatch, req.producerID, req.producerEpoch, 0, nil)
 			} else if debug {
 				fmt.Fprintf(b, "%d{skipped}, ", partition)
 			}
@@ -979,7 +979,7 @@ func (s *sink) handleReqRespBatch(
 			)
 			s.cl.failProducerID(producerID, producerEpoch, err)
 
-			s.cl.finishBatch(batch.recBatch, producerID, producerEpoch, rp.Partition, rp.BaseOffset, err)
+			s.cl.finishBatch(batch.recBatch, producerID, producerEpoch, rp.BaseOffset, err)
 			if debug {
 				fmt.Fprintf(b, "fatal@%d,%d(%s)}, ", rp.BaseOffset, nrec, err)
 			}
@@ -1043,7 +1043,7 @@ func (s *sink) handleReqRespBatch(
 				batch.owner.addedToTxn.Swap(true)
 			}
 		}
-		s.cl.finishBatch(batch.recBatch, producerID, producerEpoch, rp.Partition, rp.BaseOffset, err)
+		s.cl.finishBatch(batch.recBatch, producerID, producerEpoch, rp.BaseOffset, err)
 		didProduce = err == nil
 		if debug {
 			if err != nil {
@@ -1061,7 +1061,7 @@ func (s *sink) handleReqRespBatch(
 //
 // This is safe even if the owning recBuf migrated sinks, since we are
 // finishing based off the status of an inflight req from the original sink.
-func (cl *Client) finishBatch(batch *recBatch, producerID int64, producerEpoch int16, partition int32, baseOffset int64, err error) {
+func (cl *Client) finishBatch(batch *recBatch, producerID int64, producerEpoch int16, baseOffset int64, err error) {
 	recBuf := batch.owner
 
 	if err != nil {
@@ -1095,9 +1095,8 @@ func (cl *Client) finishBatch(batch *recBatch, producerID int64, producerEpoch i
 		// corresponding to our own RecordAttr's bit 8 being no
 		// timestamp type. Thus, we can directly convert the batch
 		// attrs to our own RecordAttrs.
-		attrs:     RecordAttrs{uint8(attrs)},
-		partition: partition,
-		recs:      records,
+		attrs: RecordAttrs{uint8(attrs)},
+		recs:  records,
 	})
 }