elastic
diff --git a/‎libbeat/publisher/queue/memqueue/queue_test.go‎
Lines changed: 386 additions & 0 deletions b/‎libbeat/publisher/queue/memqueue/queue_test.go‎
Lines changed: 386 additions & 0 deletions
@@ -442,3 +442,389 @@ func BenchmarkProducerThroughput(b *testing.B) {
 	cancel()
 	testQueue.Close(true)
 }
+
+// ---------------------------------------------------------------------------
+// Behavioral equivalence tests: these verify the guarantees that callers
+// depend on. Every test here must pass on both the old (buffered pushChan +
+// per-event resp channel) and new (unbuffered pushChan, no resp) designs.
+// ---------------------------------------------------------------------------
+
+// TestPublishReturnsOnlyAfterInsert verifies that when Publish returns true,
+// the event is already in the ring buffer (not just buffered in a channel).
+// We confirm this by immediately calling Get after Publish and verifying
+// the event is available.
+func TestPublishReturnsOnlyAfterInsert(t *testing.T) {
+	q := NewQueue(logp.NewNopLogger(), nil,
+		Settings{
+			Events:        100,
+			MaxGetRequest: 100,
+			FlushTimeout:  0, // no flush delay
+		}, 0, nil)
+	defer q.Close(true)
+
+	p := q.Producer(queue.ProducerConfig{})
+
+	// Publish one event
+	_, ok := p.Publish("event-1")
+	require.True(t, ok)
+
+	// The event must be immediately available for consumption
+	batch, err := q.Get(1)
+	require.NoError(t, err)
+	require.Equal(t, 1, batch.Count())
+	batch.Done()
+}
+
+// TestBackpressureBlocksAtQueueCapacity verifies that Publish blocks when
+// the ring buffer is full (backpressure) and unblocks when space is freed.
+func TestBackpressureBlocksAtQueueCapacity(t *testing.T) {
+	const queueSize = 5
+	q := NewQueue(logp.NewNopLogger(), nil,
+		Settings{
+			Events:        queueSize,
+			MaxGetRequest: queueSize,
+			FlushTimeout:  0,
+		}, 0, nil)
+	defer q.Close(true)
+
+	p := q.Producer(queue.ProducerConfig{})
+
+	// Fill the queue to capacity
+	for i := 0; i < queueSize; i++ {
+		_, ok := p.Publish(i)
+		require.True(t, ok, "Publish %d should succeed", i)
+	}
+
+	// Next publish must block (queue is full).
+	// Use a goroutine + timer to detect blocking.
+	published := make(chan bool, 1)
+	go func() {
+		_, ok := p.Publish("overflow")
+		published <- ok
+	}()
+
+	select {
+	case <-published:
+		t.Fatal("Publish should block when queue is full")
+	case <-time.After(50 * time.Millisecond):
+		// Expected: blocked
+	}
+
+	// Free one slot by consuming and acknowledging
+	batch, err := q.Get(1)
+	require.NoError(t, err)
+	batch.Done()
+
+	// The blocked publish should now succeed
+	select {
+	case ok := <-published:
+		require.True(t, ok, "Publish should succeed after space freed")
+	case <-time.After(time.Second):
+		t.Fatal("Publish did not unblock after freeing space")
+	}
+}
+
+// TestShutdownUnblocksProducer verifies that a producer blocked on a full
+// queue is unblocked by queue.Close and Publish returns false.
+func TestShutdownUnblocksProducer(t *testing.T) {
+	const queueSize = 3
+	q := NewQueue(logp.NewNopLogger(), nil,
+		Settings{
+			Events:        queueSize,
+			MaxGetRequest: queueSize,
+			FlushTimeout:  0,
+		}, 0, nil)
+
+	p := q.Producer(queue.ProducerConfig{})
+
+	// Fill the queue
+	for i := 0; i < queueSize; i++ {
+		_, ok := p.Publish(i)
+		require.True(t, ok)
+	}
+
+	// Blocked publish in goroutine
+	result := make(chan bool, 1)
+	go func() {
+		_, ok := p.Publish("blocked")
+		result <- ok
+	}()
+
+	// Give the goroutine time to block
+	time.Sleep(20 * time.Millisecond)
+
+	// Close the queue — should unblock the producer with ok=false
+	q.Close(false)
+
+	select {
+	case ok := <-result:
+		require.False(t, ok, "Publish should return false when queue is closing")
+	case <-time.After(time.Second):
+		t.Fatal("Blocked producer was not unblocked by queue.Close")
+	}
+}
+
+// TestPublishedEqualsAckedOnGracefulShutdown verifies that every event
+// for which Publish returned true is eventually acknowledged, across many
+// concurrent producers during a graceful shutdown. This is the key
+// correctness invariant that the pipeline depends on.
+func TestPublishedEqualsAckedOnGracefulShutdown(t *testing.T) {
+	// Run multiple times to stress-test the shutdown path
+	for iter := 0; iter < 20; iter++ {
+		const queueSize = 500
+		const publishWorkers = 20
+		var ackedCount atomic.Int64
+		var publishedCount atomic.Int64
+
+		testQueue := NewQueue(
+			logp.NewNopLogger(), nil,
+			Settings{
+				Events:        queueSize,
+				MaxGetRequest: queueSize,
+				FlushTimeout:  time.Millisecond,
+			}, 0, nil)
+
+		var wg sync.WaitGroup
+
+		// Publish workers
+		for range publishWorkers {
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				producer := testQueue.Producer(
+					queue.ProducerConfig{
+						ACK: func(count int) { ackedCount.Add(int64(count)) },
+					})
+				for {
+					_, published := producer.Publish(0)
+					if published {
+						publishedCount.Add(1)
+					} else {
+						return
+					}
+				}
+			}()
+		}
+
+		// Consumer
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for {
+				batch, err := testQueue.Get(queueSize)
+				if err == nil {
+					batch.Done()
+				} else {
+					return
+				}
+			}
+		}()
+
+		// Let events flow
+		require.Eventually(
+			t,
+			func() bool { return publishedCount.Load() > int64(queueSize) },
+			time.Second,
+			time.Millisecond,
+			"iter %d: events are not flowing", iter)
+
+		// Graceful shutdown
+		testQueue.Close(false)
+		select {
+		case <-testQueue.Done():
+		case <-time.After(5 * time.Second):
+			require.Fail(t, "iter %d: queue did not shut down", iter)
+		}
+		wg.Wait()
+		testQueue.wg.Wait()
+
+		require.Equal(t, publishedCount.Load(), ackedCount.Load(),
+			"iter %d: published (%d) != acked (%d)",
+			iter, publishedCount.Load(), ackedCount.Load())
+	}
+}
+
+// TestMultiProducerAllEventsDelivered verifies no events are lost when
+// multiple producers publish concurrently and all events are consumed.
+func TestMultiProducerAllEventsDelivered(t *testing.T) {
+	const (
+		queueSize      = 200
+		numProducers   = 10
+		eventsPerProducer = 500
+		totalEvents    = numProducers * eventsPerProducer
+	)
+
+	testQueue := NewQueue(logp.NewNopLogger(), nil,
+		Settings{
+			Events:        queueSize,
+			MaxGetRequest: 100,
+			FlushTimeout:  time.Millisecond,
+		}, 0, nil)
+
+	var consumedCount atomic.Int64
+
+	// Consumer: read and acknowledge everything
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for {
+			batch, err := testQueue.Get(100)
+			if err != nil {
+				return
+			}
+			consumedCount.Add(int64(batch.Count()))
+			batch.Done()
+		}
+	}()
+
+	// Producers: each publishes exactly eventsPerProducer events
+	var publishedCount atomic.Int64
+	for i := 0; i < numProducers; i++ {
+		wg.Add(1)
+		go func(producerID int) {
+			defer wg.Done()
+			p := testQueue.Producer(queue.ProducerConfig{})
+			for j := 0; j < eventsPerProducer; j++ {
+				_, ok := p.Publish(fmt.Sprintf("p%d-e%d", producerID, j))
+				if ok {
+					publishedCount.Add(1)
+				}
+			}
+		}(i)
+	}
+
+	// Wait for all events to be consumed
+	require.Eventually(
+		t,
+		func() bool { return consumedCount.Load() == totalEvents },
+		10*time.Second,
+		time.Millisecond,
+		"expected %d consumed events, got %d", totalEvents, consumedCount.Load())
+
+	testQueue.Close(false)
+	<-testQueue.Done()
+	wg.Wait()
+
+	require.Equal(t, int64(totalEvents), publishedCount.Load(),
+		"all Publish calls should succeed")
+	require.Equal(t, int64(totalEvents), consumedCount.Load(),
+		"all published events should be consumed")
+}
+
+// TestTryPublishDropsWhenFull verifies that TryPublish does not succeed
+// when the queue's ring buffer is at capacity.
+func TestTryPublishDropsWhenFull(t *testing.T) {
+	const queueSize = 3
+	q := NewQueue(logp.NewNopLogger(), nil,
+		Settings{
+			Events:        queueSize,
+			MaxGetRequest: queueSize,
+			FlushTimeout:  0,
+		}, 0, nil)
+	defer q.Close(true)
+
+	p := q.Producer(queue.ProducerConfig{})
+
+	// Fill the queue
+	for i := 0; i < queueSize; i++ {
+		_, ok := p.Publish(i)
+		require.True(t, ok)
+	}
+
+	// TryPublish on a full queue should not succeed. On an unbuffered
+	// pushChan it returns false immediately; on a buffered pushChan it may
+	// block (the send succeeds into the buffer but the runLoop never reads
+	// it because the ring buffer is full). Either way, we verify it does
+	// not return true within a short window.
+	result := make(chan bool, 1)
+	go func() {
+		_, ok := p.TryPublish("overflow")
+		result <- ok
+	}()
+
+	select {
+	case ok := <-result:
+		require.False(t, ok, "TryPublish must not return true when queue is full")
+	case <-time.After(100 * time.Millisecond):
+		// Acceptable: TryPublish is blocked, which means it did not succeed
+	}
+}
+
+// TestProducerCloseDoesNotBlockOnFullQueue verifies that closing a producer
+// does not deadlock even when the queue is full.
+func TestProducerCloseDoesNotBlockOnFullQueue(t *testing.T) {
+	const queueSize = 2
+	q := NewQueue(logp.NewNopLogger(), nil,
+		Settings{
+			Events:        queueSize,
+			MaxGetRequest: queueSize,
+			FlushTimeout:  0,
+		}, 0, nil)
+	defer q.Close(true)
+
+	p := q.Producer(queue.ProducerConfig{})
+
+	// Fill the queue
+	for i := 0; i < queueSize; i++ {
+		_, ok := p.Publish(i)
+		require.True(t, ok)
+	}
+
+	// Close the producer — should not deadlock
+	done := make(chan struct{})
+	go func() {
+		p.Close()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+		// Success
+	case <-time.After(time.Second):
+		t.Fatal("Producer.Close() deadlocked on full queue")
+	}
+}
+
+// TestRapidCloseAfterPublish verifies that closing the queue immediately
+// after a successful Publish does not lose the event — it should still
+// be counted either as published (and acked) or the Publish returns false.
+func TestRapidCloseAfterPublish(t *testing.T) {
+	for iter := 0; iter < 50; iter++ {
+		var acked atomic.Int64
+		q := NewQueue(logp.NewNopLogger(), nil,
+			Settings{
+				Events:        10,
+				MaxGetRequest: 10,
+				FlushTimeout:  0,
+			}, 0, nil)
+
+		p := q.Producer(queue.ProducerConfig{
+			ACK: func(count int) { acked.Add(int64(count)) },
+		})
+
+		// Consumer
+		go func() {
+			for {
+				batch, err := q.Get(10)
+				if err != nil {
+					return
+				}
+				batch.Done()
+			}
+		}()
+
+		var published int64
+		_, ok := p.Publish("event-1")
+		if ok {
+			published++
+		}
+
+		q.Close(false)
+		<-q.Done()
+
+		// Ensure we never ack more than we published
+		require.GreaterOrEqual(t, published, acked.Load(),
+			"iter %d: acked (%d) > published (%d)", iter, acked.Load(), published)
+	}
+}