Skip to content

How can a template access the streaming tokens generated during its execution? #1539

@xunfeng2zkj

Description

@xunfeng2zkj
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import time
import json


def create_fundamentals_analyst(llm):
    async def fundamentals_analyst_node(state):
        current_date = state["trade_date"]

        tools = [
            get_fundamentals,
        ]

        system_message = (
            "....",
        )

        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a helpful AI assistant, collaborating with other assistants."
                    " You have access to the following tools: {tool_names}.\n{system_message}"
                    "For your reference, the current date is {current_date}. The company we want to look at is {ticker}",
                ),
                MessagesPlaceholder(variable_name="messages"),
            ]
        )

        prompt = prompt.partial(system_message=system_message)
        prompt = prompt.partial(tool_names=", ".join([tool.name for tool in tools]))
        prompt = prompt.partial(current_date=current_date)
        prompt = prompt.partial(ticker=ticker)

        chain = prompt | llm.bind_tools(tools)

        result = await chain.ainvoke(state["messages"])

        report = ""

        if len(result.tool_calls) == 0:
            report = result.content

        return {
            "messages": [result],
            "fundamentals_report": report,
        }

    return fundamentals_analyst_node

by:

            async for chunk in self.graph.astream(init_agent_state, **args):
                if len(chunk["messages"]) == 0:
                    pass
                else:
                    if chunk['messages'][-1].type == 'tool':
                        continue
                    #chunk["messages"][-1].pretty_print()
                    trace.append(chunk)
                    yield chunk["messages"][-1].content

Currently, I'm only able to stream the aggregated output of each LangGraph node, but not the raw token-level stream produced during the model invocation. How can I stream individual tokens from the underlying LLM call during node execution?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions