atris
diff --git a/‎server/src/main/java/org/opensearch/action/ActionModule.java‎
Lines changed: 0 additions & 1 deletion b/‎server/src/main/java/org/opensearch/action/ActionModule.java‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎server/src/main/java/org/opensearch/action/search/SearchPhaseController.java‎
Lines changed: 2 additions & 3 deletions b/‎server/src/main/java/org/opensearch/action/search/SearchPhaseController.java‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎server/src/main/java/org/opensearch/action/search/SearchRequest.java‎
Lines changed: 1 addition & 1 deletion b/‎server/src/main/java/org/opensearch/action/search/SearchRequest.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎server/src/main/java/org/opensearch/action/search/StreamQueryPhaseResultConsumer.java‎
Lines changed: 56 additions & 18 deletions b/‎server/src/main/java/org/opensearch/action/search/StreamQueryPhaseResultConsumer.java‎
Lines changed: 56 additions & 18 deletions
diff --git a/‎server/src/main/java/org/opensearch/action/search/StreamSearchQueryThenFetchAsyncAction.java‎
Lines changed: 1 addition & 20 deletions b/‎server/src/main/java/org/opensearch/action/search/StreamSearchQueryThenFetchAsyncAction.java‎
Lines changed: 1 addition & 20 deletions
diff --git a/‎server/src/main/java/org/opensearch/action/search/StreamingSearchProgressListener.java‎
Lines changed: 37 additions & 5 deletions b/‎server/src/main/java/org/opensearch/action/search/StreamingSearchProgressListener.java‎
Lines changed: 37 additions & 5 deletions
diff --git a/‎server/src/main/java/org/opensearch/action/search/StreamingSearchResponseListener.java‎
Lines changed: 10 additions & 2 deletions b/‎server/src/main/java/org/opensearch/action/search/StreamingSearchResponseListener.java‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎server/src/main/java/org/opensearch/action/search/TransportSearchAction.java‎
Lines changed: 0 additions & 2 deletions b/‎server/src/main/java/org/opensearch/action/search/TransportSearchAction.java‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎server/src/main/java/org/opensearch/common/settings/ClusterSettings.java‎
Lines changed: 1 addition & 1 deletion b/‎server/src/main/java/org/opensearch/common/settings/ClusterSettings.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java‎
Lines changed: 5 additions & 1 deletion b/‎server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java‎
Lines changed: 5 additions & 1 deletion
@@ -286,7 +286,6 @@
 import org.opensearch.action.search.PutSearchPipelineTransportAction;
 import org.opensearch.action.search.SearchAction;
 import org.opensearch.action.search.SearchScrollAction;
-import org.opensearch.action.search.StreamSearchAction;
 import org.opensearch.action.search.TransportClearScrollAction;
 import org.opensearch.action.search.TransportCreatePitAction;
 import org.opensearch.action.search.TransportDeletePitAction;
 
@@ -32,10 +32,10 @@
 
 package org.opensearch.action.search;
 
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.CollectionStatistics;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
@@ -848,7 +848,6 @@ QueryPhaseResultConsumer newSearchPhaseResults(
         }
     }
 
-
     /**
      * The top docs statistics
      *
 
@@ -679,7 +679,7 @@ public void setStreamingScoring(boolean streamingScoring) {
     public boolean isStreamingScoring() {
         return streamingScoring;
     }
-    
+
     /**
      * Sets the streaming search mode for this request.
      * @param mode The streaming search mode to use
 
@@ -13,30 +13,29 @@
 import org.opensearch.common.settings.ClusterSettings;
 import org.opensearch.core.common.breaker.CircuitBreaker;
 import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
-import org.opensearch.search.streaming.StreamingSearchSettings;
 import org.opensearch.search.SearchPhaseResult;
 import org.opensearch.search.query.QuerySearchResult;
 import org.opensearch.search.query.StreamingSearchMode;
-
-import java.util.concurrent.atomic.AtomicInteger;
+import org.opensearch.search.streaming.StreamingSearchSettings;
 
 import java.util.concurrent.Executor;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Consumer;
 
 /**
  * Query phase result consumer for streaming search.
  * Supports progressive batch reduction with configurable scoring modes.
- * 
+ *
  * Batch reduction frequency is controlled by per-mode multipliers from cluster settings:
  * - NO_SCORING: Immediate reduction (batch size = 1) for fastest time-to-first-byte
  * - SCORED_UNSORTED: Small batches controlled by search.streaming.scored_unsorted.batch_multiplier (default: 2)
  * - CONFIDENCE_BASED: Moderate batches controlled by search.streaming.confidence.batch_multiplier (default: 3)
  * - SCORED_SORTED: Larger batches controlled by search.streaming.scored_sorted.batch_multiplier (default: 10)
- * 
+ *
  * These multipliers are applied to the base batch reduce size (typically 5) to determine
  * how many shard results are accumulated before triggering a partial reduction. Lower values
  * mean more frequent reductions and faster streaming, but higher coordinator CPU usage.
- * 
+ *
  * ClusterSettings must be provided (non-null) to enable dynamic configuration. Tests should
  * provide a properly configured ClusterSettings instance rather than null.
  *
@@ -45,14 +44,20 @@
 public class StreamQueryPhaseResultConsumer extends QueryPhaseResultConsumer {
 
     private static final Logger logger = LogManager.getLogger(StreamQueryPhaseResultConsumer.class);
-    
+
     private final StreamingSearchMode scoringMode;
     private final ClusterSettings clusterSettings;
     private int resultsReceived = 0;
-    
+
+    // TTFB tracking for demonstrating fetch phase timing
+    private long queryStartTime = System.currentTimeMillis();
+    private long firstBatchReadyForFetchTime = -1;
+    private boolean firstBatchReadyForFetch = false;
+    private final AtomicInteger batchesReduced = new AtomicInteger(0);
+
     /**
      * Creates a streaming query phase result consumer.
-     * 
+     *
      * @param clusterSettings cluster settings for dynamic multipliers (must not be null)
      */
     public StreamQueryPhaseResultConsumer(
@@ -76,11 +81,11 @@ public StreamQueryPhaseResultConsumer(
             expectedResultSize,
             onPartialMergeFailure
         );
-        
+
         // Initialize scoring mode from request
         String mode = request.getStreamingSearchMode();
         this.scoringMode = (mode != null) ? StreamingSearchMode.fromString(mode) : StreamingSearchMode.SCORED_SORTED;
-        
+
         // ClusterSettings is required for dynamic configuration
         if (clusterSettings == null) {
             throw new IllegalArgumentException("ClusterSettings must not be null for StreamQueryPhaseResultConsumer");
@@ -101,7 +106,7 @@ int getBatchReduceSize(int requestBatchedReduceSize, int minBatchReduceSize) {
         if (scoringMode == null || clusterSettings == null) {
             return super.getBatchReduceSize(requestBatchedReduceSize, minBatchReduceSize * 10);
         }
-        
+
         switch (scoringMode) {
             case NO_SCORING:
                 // Reduce immediately for fastest TTFB (similar to streaming aggs with low batch size)
@@ -127,7 +132,7 @@ int getBatchReduceSize(int requestBatchedReduceSize, int minBatchReduceSize) {
     /**
      * Consume streaming results with frequency-based emission
      */
-    void consumeStreamResult(SearchPhaseResult result, Runnable next) {
+    public void consumeStreamResult(SearchPhaseResult result, Runnable next) {
         QuerySearchResult querySearchResult = result.queryResult();
 
         // Check if already consumed
@@ -138,13 +143,46 @@ void consumeStreamResult(SearchPhaseResult result, Runnable next) {
         }
 
         resultsReceived++;
-        logger.debug("Consumed result #{} from shard {}, partial={}, hasTopDocs={}", 
-                    resultsReceived, result.getShardIndex(), querySearchResult.isPartial(), 
-                    querySearchResult.topDocs() != null);
-        
+
+        // Track when first batch is ready for fetch phase
+        // Use the batch size that was configured for this mode
+        int batchSize = getBatchReduceSize(Integer.MAX_VALUE, 5);
+        if (!firstBatchReadyForFetch && resultsReceived >= batchSize) {
+            firstBatchReadyForFetch = true;
+            firstBatchReadyForFetchTime = System.currentTimeMillis();
+            long ttfb = firstBatchReadyForFetchTime - queryStartTime;
+            logger.info(
+                "STREAMING TTFB: First batch ready for fetch after {} ms with {} results (batch size: {})",
+                ttfb,
+                resultsReceived,
+                batchSize
+            );
+        }
+
+        logger.debug(
+            "Consumed result #{} from shard {}, partial={}, hasTopDocs={}",
+            resultsReceived,
+            result.getShardIndex(),
+            querySearchResult.isPartial(),
+            querySearchResult.topDocs() != null
+        );
+
         // Use parent's pendingMerges to consume the result
         // Partial reduces are automatically triggered by batchReduceSize
         pendingMerges.consume(querySearchResult, next);
     }
-}
 
+    /**
+     * Get TTFB metrics for benchmarking
+     */
+    public long getTimeToFirstBatch() {
+        if (firstBatchReadyForFetchTime > 0) {
+            return firstBatchReadyForFetchTime - queryStartTime;
+        }
+        return -1;
+    }
+
+    public boolean isFirstBatchReady() {
+        return firstBatchReadyForFetch;
+    }
+}
@@ -15,7 +15,6 @@
 import org.opensearch.search.SearchPhaseResult;
 import org.opensearch.search.SearchShardTarget;
 import org.opensearch.search.internal.AliasFilter;
-import org.opensearch.search.query.QuerySearchResult;
 import org.opensearch.telemetry.tracing.Tracer;
 import org.opensearch.transport.Transport;
 
@@ -148,25 +147,7 @@ protected void onStreamResult(SearchPhaseResult result, SearchShardIterator shar
      */
     @Override
     protected void onShardResult(SearchPhaseResult result, SearchShardIterator shardIt) {
-        QuerySearchResult queryResult = result.queryResult();
-
-        // For streaming search, if topDocs has already been consumed,
-        // we need to handle this gracefully to avoid the error
-        if (queryResult.hasConsumedTopDocs()) {
-            // This is a streaming result that has already been processed
-            // We can't call the parent's onShardResult because it will try to access topDocs
-            // Instead, we'll just mark this as successful and continue
-            if (getLogger().isDebugEnabled()) {
-                getLogger().debug(
-                    "Skipping onShardResult for already consumed streaming result from shard {}",
-                    queryResult.getShardIndex()
-                );
-            }
-            // Don't call super.onShardResult() to avoid the error
-            return;
-        }
-
-        // For normal cases, call the parent method
+        // Always delegate to the parent to ensure shard accounting and phase transitions.
         super.onShardResult(result, shardIt);
     }
 
 
@@ -104,11 +104,43 @@ protected void onPartialReduceWithTopDocs(
     }
 
     private void collectPartialResponse(SearchResponse partialResponse) {
-        if (responseListener instanceof StreamingSearchResponseListener) {
-            ((StreamingSearchResponseListener) responseListener).onPartialResponse(partialResponse);
-        } else {
-            logger.debug("Partial result computed, listener type: {}", responseListener.getClass().getSimpleName());
+        ActionListener<SearchResponse> target = unwrapListener(responseListener, 3);
+        if (target instanceof StreamingSearchResponseListener) {
+            ((StreamingSearchResponseListener) target).onPartialResponse(partialResponse);
+            return;
+        }
+        logger.debug("Partial result computed, listener type: {}", responseListener.getClass().getSimpleName());
+    }
+
+    /**
+     * Attempt to unwrap tracing/decorated listeners to reach the original delegate.
+     * This enables partial emissions to reach StreamingSearchResponseListener even when wrapped.
+     */
+    @SuppressWarnings({ "unchecked", "rawtypes" })
+    private ActionListener<SearchResponse> unwrapListener(ActionListener<SearchResponse> listener, int depth) {
+        if (listener == null || depth <= 0) {
+            return listener;
         }
+        try {
+            // Best-effort unwrap for TraceableActionListener without direct dependency
+            Class<?> cls = listener.getClass();
+            while (cls != null) {
+                try {
+                    java.lang.reflect.Field delegateField = cls.getDeclaredField("delegate");
+                    delegateField.setAccessible(true);
+                    Object delegate = delegateField.get(listener);
+                    if (delegate instanceof ActionListener) {
+                        return unwrapListener((ActionListener) delegate, depth - 1);
+                    }
+                    break;
+                } catch (NoSuchFieldException e) {
+                    cls = cls.getSuperclass();
+                }
+            }
+        } catch (Throwable t) {
+            logger.debug("Failed to unwrap listener: {}", t.toString());
+        }
+        return listener;
     }
 
     @Override
@@ -133,7 +165,7 @@ public void triggerPartialEmission() {
         // Trigger a partial reduce to emit current results
         // This will call onPartialReduceWithTopDocs if there are results to emit
         logger.debug("Triggering partial emission, current emissions: {}", streamEmissions.get());
-        
+
         // For now, just log that we're triggering emission
         // The actual emission will happen when onPartialReduceWithTopDocs is called
         // by the parent class's reduce logic
 
@@ -39,8 +39,8 @@ public StreamingSearchResponseListener(ActionListener<SearchResponse> delegate,
     }
 
     /**
-     * Collect a partial response. Since we can't stream to client,
-     * we collect them for logging and metadata purposes.
+     * Collect a partial response and track TTFB.
+     * Store first partial response time for TTFB measurement.
      */
     public void onPartialResponse(SearchResponse partialResponse) {
         if (isComplete.get()) {
@@ -54,6 +54,14 @@ public void onPartialResponse(SearchResponse partialResponse) {
 
         partialResponses.add(partialResponse);
         logPartialResponse(partialResponse, count);
+
+        // Track TTFB - first partial result delivery time
+        if (count == 1 && partialResponse.getHits() != null) {
+            int numHits = partialResponse.getHits().getHits().length;
+            logger.info("TTFB ACHIEVED: First partial result delivered with {} hits", numHits);
+            // This is where TTFB happens in streaming mode
+            // Store this timestamp if needed for benchmarking
+        }
     }
 
     /**
 
@@ -97,14 +97,12 @@
 import org.opensearch.transport.RemoteClusterAware;
 import org.opensearch.transport.RemoteClusterService;
 import org.opensearch.transport.RemoteTransportException;
-import org.opensearch.transport.StreamTransportService;
 import org.opensearch.transport.Transport;
 import org.opensearch.transport.TransportService;
 import org.opensearch.transport.client.Client;
 import org.opensearch.transport.client.OriginSettingClient;
 import org.opensearch.transport.client.node.NodeClient;
 import org.opensearch.wlm.WorkloadGroupTask;
-import org.opensearch.common.Nullable;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 
@@ -157,14 +157,14 @@
 import org.opensearch.rest.BaseRestHandler;
 import org.opensearch.script.ScriptService;
 import org.opensearch.search.SearchService;
-import org.opensearch.search.streaming.StreamingSearchSettings;
 import org.opensearch.search.aggregations.MultiBucketConsumerService;
 import org.opensearch.search.backpressure.settings.NodeDuressSettings;
 import org.opensearch.search.backpressure.settings.SearchBackpressureSettings;
 import org.opensearch.search.backpressure.settings.SearchShardTaskSettings;
 import org.opensearch.search.backpressure.settings.SearchTaskSettings;
 import org.opensearch.search.fetch.subphase.highlight.FastVectorHighlighter;
 import org.opensearch.search.pipeline.SearchPipelineService;
+import org.opensearch.search.streaming.StreamingSearchSettings;
 import org.opensearch.snapshots.InternalSnapshotsInfoService;
 import org.opensearch.snapshots.SnapshotsService;
 import org.opensearch.tasks.TaskCancellationMonitoringSettings;
 
@@ -37,7 +37,6 @@
 import org.opensearch.action.search.SearchAction;
 import org.opensearch.action.search.SearchContextId;
 import org.opensearch.action.search.SearchRequest;
-import org.opensearch.action.search.StreamSearchAction;
 import org.opensearch.action.support.IndicesOptions;
 import org.opensearch.common.Booleans;
 import org.opensearch.common.util.FeatureFlags;
@@ -231,6 +230,11 @@ public static void parseSearchRequest(
         searchRequest.indicesOptions(IndicesOptions.fromRequest(request, searchRequest.indicesOptions()));
         searchRequest.pipeline(request.param("search_pipeline", searchRequest.source().pipeline()));
 
+        // Add streaming mode support
+        if (request.hasParam("streaming_mode")) {
+            searchRequest.setStreamingSearchMode(request.param("streaming_mode"));
+        }
+
         checkRestTotalHits(request, searchRequest);
         request.paramAsBoolean(INCLUDE_NAMED_QUERIES_SCORE_PARAM, false);
Original file line number	Diff line number	Diff line change
`@@ -32,10 +32,10 @@`
`32`	`32`
`33`	`33`	`package org.opensearch.action.search;`
`34`	`34`
`35`		`-import org.apache.lucene.index.Term;`
`36`		`-import org.apache.lucene.search.CollectionStatistics;`
`37`	`35`	`import org.apache.logging.log4j.LogManager;`
`38`	`36`	`import org.apache.logging.log4j.Logger;`
	`37`	`+import org.apache.lucene.index.Term;`
	`38`	`+import org.apache.lucene.search.CollectionStatistics;`
`39`	`39`	`import org.apache.lucene.search.FieldDoc;`
`40`	`40`	`import org.apache.lucene.search.ScoreDoc;`
`41`	`41`	`import org.apache.lucene.search.Sort;`
`@@ -848,7 +848,6 @@ QueryPhaseResultConsumer newSearchPhaseResults(`
`848`	`848`	`}`
`849`	`849`	`}`
`850`	`850`
`851`		`-`
`852`	`851`	`/**`
`853`	`852`	`* The top docs statistics`
`854`	`853`	`*`
Original file line number	Diff line number	Diff line change
`@@ -679,7 +679,7 @@ public void setStreamingScoring(boolean streamingScoring) {`
`679`	`679`	`public boolean isStreamingScoring() {`
`680`	`680`	`return streamingScoring;`
`681`	`681`	`}`
`682`		`-`
	`682`	`+`
`683`	`683`	`/**`
`684`	`684`	`* Sets the streaming search mode for this request.`
`685`	`685`	`* @param mode The streaming search mode to use`