Implement stop streaming button in v3 (#1351)

brichet · pre-commit-ci[bot] · web-flow · commit 6b3d406ca4b5 · 2025-06-06T13:07:37.000-07:00
* Add handler to stop message streaming * Add the stop streaming button in message footer * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Set explicitelly bot user, and listen only from message written by a bot to stop streaming * lint * Update jupyterlab-chat dependency * lint * Fix assertion error --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py b/packages/jupyter-ai/jupyter_ai/chat_handlers/base.py
@@ -452,19 +452,46 @@ async def stream_reply(
             chunk_generator = self.llm_chain.astream(input, config=merged_config)
             # TODO v3: re-implement stream interrupt
             stream_interrupted = False
+            stream_id = None
+            received_first_chunk = False
             async for chunk in chunk_generator:
+                if (
+                    stream_id
+                    and stream_id in self.message_interrupted.keys()
+                    and self.message_interrupted[stream_id].is_set()
+                ):
+                    try:
+                        # notify the model provider that streaming was interrupted
+                        # (this is essential to allow the model to stop generating)
+                        await chunk_generator.athrow(  # type:ignore[attr-defined]
+                            GenerationInterrupted()
+                        )
+                    except GenerationInterrupted:
+                        # do not let the exception bubble up in case if
+                        # the provider did not handle it
+                        pass
+                    stream_interrupted = True
+                    break
+
                 if isinstance(chunk, AIMessageChunk) and isinstance(chunk.content, str):
-                    reply_stream.write(chunk.content)
+                    stream_id = reply_stream.write(chunk.content)
                 elif isinstance(chunk, str):
-                    reply_stream.write(chunk)
+                    stream_id = reply_stream.write(chunk)
                 else:
                     self.log.error(f"Unrecognized type of chunk yielded: {type(chunk)}")
                     break
 
+                if not received_first_chunk:
+                    # when receiving the first chunk, start the stream.
+                    received_first_chunk = True
+                    self.message_interrupted[stream_id] = asyncio.Event()
+
             # if stream was interrupted, add a tombstone
             if stream_interrupted:
                 stream_tombstone = "\n\n(AI response stopped by user)"
                 reply_stream.write(stream_tombstone)
+            if stream_id and stream_id in self.message_interrupted.keys():
+                del self.message_interrupted[stream_id]
 
 
 class GenerationInterrupted(asyncio.CancelledError):
diff --git a/packages/jupyter-ai/jupyter_ai/extension.py b/packages/jupyter-ai/jupyter_ai/extension.py
@@ -28,6 +28,7 @@
     AutocompleteOptionsHandler,
     EmbeddingsModelProviderHandler,
     GlobalConfigHandler,
+    InterruptStreamingHandler,
     ModelProviderHandler,
     SlashCommandsInfoHandler,
 )
@@ -77,6 +78,7 @@ class AiExtension(ExtensionApp):
         (r"api/ai/config/?", GlobalConfigHandler),
         (r"api/ai/chats/slash_commands?", SlashCommandsInfoHandler),
         (r"api/ai/chats/autocomplete_options?", AutocompleteOptionsHandler),
+        (r"api/ai/chats/stop_streaming?", InterruptStreamingHandler),
         (r"api/ai/providers?", ModelProviderHandler),
         (r"api/ai/providers/embeddings?", EmbeddingsModelProviderHandler),
         (r"api/ai/completion/inline/?", DefaultInlineCompletionHandler),
@@ -625,17 +627,23 @@ def _init_persona_manager(self, ychat: YChat) -> Optional[PersonaManager]:
         This method should not raise an exception. Upon encountering an
         exception, this method will catch it, log it, and return `None`.
         """
-        persona_manager: Optional[PersonaManager]
+        persona_manager: Optional[PersonaManager] = None
 
         try:
             config_manager = self.settings.get("jai_config_manager", None)
             assert config_manager and isinstance(config_manager, ConfigManager)
 
+            message_interrupted = self.settings.get("jai_message_interrupted", None)
+            assert message_interrupted is not None and isinstance(
+                message_interrupted, dict
+            )
+
             persona_manager = PersonaManager(
                 ychat=ychat,
                 config_manager=config_manager,
                 event_loop=self.event_loop,
                 log=self.log,
+                message_interrupted=message_interrupted,
             )
         except Exception as e:
             # TODO: how to stop the extension when this fails
diff --git a/packages/jupyter-ai/jupyter_ai/handlers.py b/packages/jupyter-ai/jupyter_ai/handlers.py
@@ -204,6 +204,17 @@ def delete(self, api_key_name: str):
             raise HTTPError(500, str(e))
 
 
+class InterruptStreamingHandler(BaseAPIHandler):
+    """Interrupt a current message streaming"""
+
+    @web.authenticated
+    def post(self):
+        message_id = self.get_json_body().get("message_id")
+        message_interrupted = self.settings.get("jai_message_interrupted")
+        if message_id and message_id in message_interrupted.keys():
+            message_interrupted[message_id].set()
+
+
 class SlashCommandsInfoHandler(BaseAPIHandler):
     """List slash commands that are currently available to the user."""
 
diff --git a/packages/jupyter-ai/jupyter_ai/personas/base_persona.py b/packages/jupyter-ai/jupyter_ai/personas/base_persona.py
@@ -1,3 +1,4 @@
+import asyncio
 from abc import ABC, abstractmethod
 from dataclasses import asdict
 from logging import Logger
@@ -82,6 +83,10 @@ class BasePersona(ABC):
     Automatically set by `BasePersona`.
     """
 
+    message_interrupted: dict[str, asyncio.Event]
+    """Dictionary mapping an agent message identifier to an asyncio Event
+    which indicates if the message generation/streaming was interrupted."""
+
     ################################################
     # constructor
     ################################################
@@ -92,11 +97,13 @@ def __init__(
         manager: "PersonaManager",
         config: ConfigManager,
         log: Logger,
+        message_interrupted: dict[str, asyncio.Event],
     ):
         self.ychat = ychat
         self.manager = manager
         self.config = config
         self.log = log
+        self.message_interrupted = message_interrupted
         self.awareness = PersonaAwareness(
             ychat=self.ychat, log=self.log, user=self.as_user()
         )
@@ -221,14 +228,34 @@ async def stream_message(self, reply_stream: "AsyncIterator") -> None:
         - Automatically manages its awareness state to show writing status.
         """
         stream_id: Optional[str] = None
-
+        stream_interrupted = False
         try:
             self.awareness.set_local_state_field("isWriting", True)
             async for chunk in reply_stream:
+                if (
+                    stream_id
+                    and stream_id in self.message_interrupted.keys()
+                    and self.message_interrupted[stream_id].is_set()
+                ):
+                    try:
+                        # notify the model provider that streaming was interrupted
+                        # (this is essential to allow the model to stop generating)
+                        await reply_stream.athrow(  # type:ignore[attr-defined]
+                            GenerationInterrupted()
+                        )
+                    except GenerationInterrupted:
+                        # do not let the exception bubble up in case if
+                        # the provider did not handle it
+                        pass
+                    stream_interrupted = True
+                    break
+
                 if not stream_id:
                     stream_id = self.ychat.add_message(
                         NewMessage(body="", sender=self.id)
                     )
+                    self.message_interrupted[stream_id] = asyncio.Event()
+                    self.awareness.set_local_state_field("isWriting", stream_id)
 
                 assert stream_id
                 self.ychat.update_message(
@@ -248,9 +275,29 @@ async def stream_message(self, reply_stream: "AsyncIterator") -> None:
             self.log.exception(e)
         finally:
             self.awareness.set_local_state_field("isWriting", False)
+            if stream_id:
+                # if stream was interrupted, add a tombstone
+                if stream_interrupted:
+                    stream_tombstone = "\n\n(AI response stopped by user)"
+                    self.ychat.update_message(
+                        Message(
+                            id=stream_id,
+                            body=stream_tombstone,
+                            time=time(),
+                            sender=self.id,
+                            raw_time=False,
+                        ),
+                        append=True,
+                    )
+                if stream_id in self.message_interrupted.keys():
+                    del self.message_interrupted[stream_id]
 
     def send_message(self, body: str) -> None:
         """
         Sends a new message to the chat from this persona.
         """
         self.ychat.add_message(NewMessage(body=body, sender=self.id))
+
+
+class GenerationInterrupted(asyncio.CancelledError):
+    """Exception raised when streaming is cancelled by the user"""
diff --git a/packages/jupyter-ai/jupyter_ai/personas/persona_manager.py b/packages/jupyter-ai/jupyter_ai/personas/persona_manager.py
@@ -1,3 +1,4 @@
+import asyncio
 from logging import Logger
 from time import time_ns
 from typing import TYPE_CHECKING, ClassVar, Optional
@@ -37,11 +38,13 @@ def __init__(
         config_manager: ConfigManager,
         event_loop: "AbstractEventLoop",
         log: Logger,
+        message_interrupted: dict[str, asyncio.Event],
     ):
         self.ychat = ychat
         self.config_manager = config_manager
         self.event_loop = event_loop
         self.log = log
+        self.message_interrupted = message_interrupted
 
         if not isinstance(PersonaManager._persona_classes, list):
             self._init_persona_classes()
@@ -125,6 +128,7 @@ def _init_personas(self) -> dict[str, BasePersona]:
                     manager=self,
                     config=self.config_manager,
                     log=self.log,
+                    message_interrupted=self.message_interrupted,
                 )
             except Exception:
                 self.log.exception(
diff --git a/packages/jupyter-ai/package.json b/packages/jupyter-ai/package.json
@@ -62,7 +62,7 @@
     "@emotion/react": "^11.10.5",
     "@emotion/styled": "^11.10.5",
     "@jupyter-notebook/application": "^7.2.0",
-    "@jupyter/chat": "^0.11.0",
+    "@jupyter/chat": "^0.12.0",
     "@jupyterlab/application": "^4.2.0",
     "@jupyterlab/apputils": "^4.2.0",
     "@jupyterlab/codeeditor": "^4.2.0",
diff --git a/packages/jupyter-ai/pyproject.toml b/packages/jupyter-ai/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
     # traitlets>=5.6 is required in JL4
     "traitlets>=5.6",
     "deepmerge>=2.0,<3",
-    "jupyterlab-chat>=0.11.0,<0.12.0",
+    "jupyterlab-chat>=0.12.0,<0.13.0",
 ]
 
 dynamic = ["version", "description", "authors", "urls", "keywords"]
diff --git a/packages/jupyter-ai/src/components/message-footer/stop-button.tsx b/packages/jupyter-ai/src/components/message-footer/stop-button.tsx
@@ -0,0 +1,73 @@
+import {
+  IChatModel,
+  MessageFooterSectionProps,
+  TooltippedButton
+} from '@jupyter/chat';
+import StopIcon from '@mui/icons-material/Stop';
+import React, { useEffect, useState } from 'react';
+import { requestAPI } from '../../handler';
+
+/**
+ * The stop button.
+ */
+export function StopButton(props: MessageFooterSectionProps): JSX.Element {
+  const { message, model } = props;
+  const [visible, setVisible] = useState<boolean>(false);
+  const tooltip = 'Stop streaming';
+
+  useEffect(() => {
+    const writerChanged = (_: IChatModel, writers: IChatModel.IWriter[]) => {
+      const w = writers.filter(w => w.messageID === message.id);
+      if (w.length > 0) {
+        setVisible(true);
+      } else {
+        setVisible(false);
+      }
+    };
+
+    // Listen only the messages that are from a bot.
+    if (
+      message.sender.username !== model.user?.username &&
+      message.sender.bot
+    ) {
+      model.writersChanged?.connect(writerChanged);
+
+      // Check if the message is currently being edited.
+      writerChanged(model, model.writers);
+    }
+
+    return () => {
+      model.writersChanged?.disconnect(writerChanged);
+    };
+  }, [model]);
+
+  const onClick = () => {
+    // Post request to the stop streaming handler.
+    requestAPI('chats/stop_streaming', {
+      method: 'POST',
+      body: JSON.stringify({
+        message_id: message.id
+      }),
+      headers: {
+        'Content-Type': 'application/json'
+      }
+    });
+  };
+
+  return visible ? (
+    <TooltippedButton
+      onClick={onClick}
+      tooltip={tooltip}
+      buttonProps={{
+        size: 'small',
+        variant: 'contained',
+        title: tooltip
+      }}
+      sx={{ display: visible ? 'inline-flex' : 'none' }}
+    >
+      <StopIcon />
+    </TooltippedButton>
+  ) : (
+    <></>
+  );
+}
diff --git a/packages/jupyter-ai/src/index.ts b/packages/jupyter-ai/src/index.ts
@@ -1,4 +1,5 @@
 import { INotebookShell } from '@jupyter-notebook/application';
+import { IMessageFooterRegistry } from '@jupyter/chat';
 import {
   JupyterFrontEnd,
   JupyterFrontEndPlugin
@@ -16,6 +17,7 @@ import { SingletonLayout, Widget } from '@lumino/widgets';
 
 import { chatCommandPlugins } from './chat-commands';
 import { completionPlugin } from './completions';
+import { StopButton } from './components/message-footer/stop-button';
 import { statusItemPlugin } from './status';
 import { IJaiCompletionProvider } from './tokens';
 import { buildErrorWidget } from './widgets/chat-error';
@@ -104,10 +106,23 @@ const plugin: JupyterFrontEndPlugin<void> = {
   }
 };
 
+const stopStreaming: JupyterFrontEndPlugin<void> = {
+  id: '@jupyter-ai/core:stop-streaming',
+  autoStart: true,
+  requires: [IMessageFooterRegistry],
+  activate: (app: JupyterFrontEnd, registry: IMessageFooterRegistry) => {
+    registry.addSection({
+      component: StopButton,
+      position: 'center'
+    });
+  }
+};
+
 export default [
   plugin,
   statusItemPlugin,
   completionPlugin,
+  stopStreaming,
   ...chatCommandPlugins
 ];
 
diff --git a/yarn.lock b/yarn.lock

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ dependencies = [`
`37`	`37`	`# traitlets>=5.6 is required in JL4`
`38`	`38`	`"traitlets>=5.6",`
`39`	`39`	`"deepmerge>=2.0,<3",`
`40`		`- "jupyterlab-chat>=0.11.0,<0.12.0",`
	`40`	`+ "jupyterlab-chat>=0.12.0,<0.13.0",`
`41`	`41`	`]`
`42`	`42`
`43`	`43`	`dynamic = ["version", "description", "authors", "urls", "keywords"]`