langchain-ai
diff --git a/‎libs/ibm/langchain_ibm/chat_models.py
Lines changed: 83 additions & 1 deletion b/‎libs/ibm/langchain_ibm/chat_models.py
Lines changed: 83 additions & 1 deletion
diff --git a/‎libs/ibm/langchain_ibm/llms.py
Lines changed: 31 additions & 1 deletion b/‎libs/ibm/langchain_ibm/llms.py
Lines changed: 31 additions & 1 deletion
@@ -6,6 +6,7 @@
 from operator import itemgetter
 from typing import (
     Any,
+    AsyncIterator,
     Callable,
     Dict,
     Iterator,
@@ -27,11 +28,15 @@
     BaseSchema,
     TextChatParameters,
 )
-from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
 from langchain_core.language_models import LanguageModelInput
 from langchain_core.language_models.chat_models import (
     BaseChatModel,
     LangSmithParams,
+    agenerate_from_stream,
     generate_from_stream,
 )
 from langchain_core.messages import (
@@ -718,6 +723,27 @@ def _generate(
         )
         return self._create_chat_result(response)
 
+    async def _agenerate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        if self.streaming:
+            stream_iter = self._astream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            )
+            return await agenerate_from_stream(stream_iter)
+
+        message_dicts, params = self._create_message_dicts(messages, stop, **kwargs)
+        updated_params = self._merge_params(params, kwargs)
+
+        response = await self.watsonx_model.achat(
+            messages=message_dicts, **(kwargs | {"params": updated_params})
+        )
+        return self._create_chat_result(response)
+
     def _stream(
         self,
         messages: List[BaseMessage],
@@ -768,6 +794,62 @@ def _stream(
 
             yield generation_chunk
 
+    async def _astream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[ChatGenerationChunk]:
+        message_dicts, params = self._create_message_dicts(messages, stop, **kwargs)
+        updated_params = self._merge_params(params, kwargs)
+
+        default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
+
+        is_first_tool_chunk = True
+        _prompt_tokens_included = False
+
+        response = await self.watsonx_model.achat_stream(
+            messages=message_dicts, **(kwargs | {"params": updated_params})
+        )
+        async for chunk in response:
+            if not isinstance(chunk, dict):
+                chunk = chunk.model_dump()
+            generation_chunk = _convert_chunk_to_generation_chunk(
+                chunk,
+                default_chunk_class,
+                is_first_tool_chunk,
+                _prompt_tokens_included,
+            )
+            if generation_chunk is None:
+                continue
+
+            if (
+                hasattr(generation_chunk.message, "usage_metadata")
+                and generation_chunk.message.usage_metadata
+            ):
+                _prompt_tokens_included = True
+            default_chunk_class = generation_chunk.message.__class__
+            logprobs = (generation_chunk.generation_info or {}).get("logprobs")
+            if run_manager:
+                await run_manager.on_llm_new_token(
+                    generation_chunk.text,
+                    chunk=generation_chunk,
+                    logprobs=logprobs,
+                )
+            if hasattr(generation_chunk.message, "tool_calls") and isinstance(
+                generation_chunk.message.tool_calls, list
+            ):
+                first_tool_call = (
+                    generation_chunk.message.tool_calls[0]
+                    if generation_chunk.message.tool_calls
+                    else None
+                )
+                if isinstance(first_tool_call, dict) and first_tool_call.get("name"):
+                    is_first_tool_chunk = False
+
+            yield generation_chunk
+
     @staticmethod
     def _merge_params(params: dict, kwargs: dict) -> dict:
         param_updates = {}
 
@@ -1,7 +1,17 @@
 from __future__ import annotations
 
 import logging
-from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple, Union
+from typing import (
+    Any,
+    AsyncIterator,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+)
 
 from ibm_watsonx_ai import APIClient, Credentials  # type: ignore
 from ibm_watsonx_ai.foundation_models import Model, ModelInference  # type: ignore
@@ -524,6 +534,26 @@ def _stream(
                 run_manager.on_llm_new_token(chunk.text, chunk=chunk)
             yield chunk
 
+    async def _astream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[GenerationChunk]:
+        params, kwargs = self._get_chat_params(stop=stop, **kwargs)
+        params = self._validate_chat_params(params)
+        async for stream_resp in await self.watsonx_model.agenerate_stream(
+            prompt=prompt, params=params
+        ):
+            if not isinstance(stream_resp, dict):
+                stream_resp = stream_resp.dict()
+            chunk = self._stream_response_to_generation_chunk(stream_resp)
+
+            if run_manager:
+                await run_manager.on_llm_new_token(chunk.text, chunk=chunk)
+            yield chunk
+
     def get_num_tokens(self, text: str) -> int:
         response = self.watsonx_model.tokenize(text, return_tokens=False)
         return response["result"]["token_count"]