Skip to content

langchain + xinference llm.with_structured_output Probabilistic error reporting occurs under high concurrency #107

@berserker3912

Description

@berserker3912

vllm 0.11.0 xinference 1.17.0 langchain 1.2.6 qwen3-install-30b

class DicInfo(BaseModel):
"""xxxx"""
category: StateType = Field(description="xxxx")
confidence: Optional[float] = Field(
default=0.0,
ge=0.0,
le=1.0,
description="xxxx"
)
detail_categories: Optional[List[str]] = Field(
default_factory=list,
description="xxxx"
)

structured_llm = llm.with_structured_output(DicInfo)
messages = [SystemMessage(content=system_prompt), HumanMessage(content=f"{user_input}")]
final_answer = structured_llm.invoke(messages)

Traceback (most recent call last):
File "/xxxx/xxxx.py", line 130, in classification_execution
final_answer = structured_llm.invoke(messages)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/xxxx/site-packages/langchain_core/runnables/base.py", line 3149, in invoke
input_ = context.run(step.invoke, input_, config, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/xxxx/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 402, in invoke
self.generate_prompt(
File "/xxxx/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 1121, in generate_prompt
return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/xxxx/site-packages/langchain_core/language_models/chat_models.py", line 931, in generate
self._generate_with_cache(
File "/xxxx/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 1225, in _generate_with_cache
result = self._generate(
^^^^^^^^^^^^^^^
File "/xxxx/lib/python3.12/site-packages/langchain_xinference/chat_models.py", line 226, in _generate
final_chunk = self._chat_with_aggregation(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/xxxx/lib/python3.12/site-packages/langchain_xinference/chat_models.py", line 310, in _chat_with_aggregation
for stream_resp in response:
^^^^^^^^
File "/xxxx/lib/python3.12/site-packages/xinference/client/common.py", line 62, in streaming_response_iterator
raise Exception(str(error))
Exception: [address=127.0.0.1:39175, pid=167075] Expecting ',' delimiter: line 3 column 1 (char 120)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions