LLMGraphTransformer returns nothing when Using langchain_openai.ChatOpenAi with base_url (openai proxy) #25048
-
Checked other resources
Commit to Help
Example Codefrom langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.5,
api_key="sk-*******",
base_url="https://api.oneabc.org/v1"
)
transformer = LLMGraphTransformer(
llm=llm,
allowed_nodes=["Person", "Organization"],
strict_mode=False
)
doc = Document(page_content="Elon Musk is suing OpenAI")
graph_documents = transformer.convert_to_graph_documents([doc])
print(graph_documents) DescriptionProblemWhen I want to use LLMGraphTransformer which llm is ChatOpenAI with a proxy base_url to extract nodes and relationship , the function returns nothing from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.5,
api_key="sk-*******",
base_url="https://api.oneabc.org/v1"
)
transformer = LLMGraphTransformer(
llm=llm,
allowed_nodes=["Person", "Organization"],
strict_mode=False
)
doc = Document(page_content="Elon Musk is suing OpenAI")
graph_documents = transformer.convert_to_graph_documents([doc])
print(graph_documents) Output: def invoke(
self, input: Input, config: Optional[RunnableConfig] = None
) -> Dict[str, Any]:
from langchain_core.callbacks.manager import CallbackManager
.........
# gather results from all steps
try:
# copy to avoid issues from the caller mutating the steps during invoke()
steps = dict(self.steps__)
with get_executor_for_config(config) as executor:
futures = [
executor.submit(
step.invoke,
input,
# mark each step as a child run
patch_config(
config,
callbacks=run_manager.get_child(f"map:key:{key}"),
),
)
for key, step in steps.items()
]
output = {key: future.result() for key, future in zip(steps, futures)}
print(output) #here!!!!!
# finish the root run
except BaseException as e:
run_manager.on_chain_error(e)
raise
else:
run_manager.on_chain_end(output)
print(output) #here!!!!!
return output def process_response(
self, document: Document, config: Optional[RunnableConfig] = None
) -> GraphDocument:
"""
Processes a single document, transforming it into a graph document using
an LLM based on the model's schema and constraints.
"""
text = document.page_content
raw_schema = self.chain.invoke({"input": text}, config=config)
print(f"raw_schema1:{raw_schema}")#here!!!!!
if self._function_call:
raw_schema = cast(Dict[Any, Any], raw_schema)
print(f"raw_schema2:{raw_schema}")#here!!!!!
nodes, relationships = _convert_to_graph_document(raw_schema)
print(f"nodes:{nodes},relationships:{relationships}")#here!!!!! and the code output is here: {'raw': AIMessage(content='```json\n{\n "nodes": [\n {\n "id": "Elon Musk",\n "label": "person"\n },\n {\n "id": "OpenAI",\n "label": "organization"\n }\n ],\n "relationships": [\n {\n "source": "Elon Musk",\n "target": "OpenAI",\n "relationship": "SUING"\n }\n ]\n}\n```', response_metadata={'token_usage': {'completion_tokens': 88, 'prompt_tokens': 438, 'total_tokens': 526}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ef2a0b21-d9dc-4595-ac1c-84f0ecaacfdf-0', usage_metadata={'input_tokens': 438, 'output_tokens': 88, 'total_tokens': 526})}
{'raw': AIMessage(content='```json\n{\n "nodes": [\n {\n "id": "Elon Musk",\n "label": "person"\n },\n {\n "id": "OpenAI",\n "label": "organization"\n }\n ],\n "relationships": [\n {\n "source": "Elon Musk",\n "target": "OpenAI",\n "relationship": "SUING"\n }\n ]\n}\n```', response_metadata={'token_usage': {'completion_tokens': 88, 'prompt_tokens': 438, 'total_tokens': 526}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ef2a0b21-d9dc-4595-ac1c-84f0ecaacfdf-0', usage_metadata={'input_tokens': 438, 'output_tokens': 88, 'total_tokens': 526})}
{'parsed': None, 'parsing_error': None}
{'parsed': None, 'parsing_error': None}
raw_schema1:{'raw': AIMessage(content='```json\n{\n "nodes": [\n {\n "id": "Elon Musk",\n "label": "person"\n },\n {\n "id": "OpenAI",\n "label": "organization"\n }\n ],\n "relationships": [\n {\n "source": "Elon Musk",\n "target": "OpenAI",\n "relationship": "SUING"\n }\n ]\n}\n```', response_metadata={'token_usage': {'completion_tokens': 88, 'prompt_tokens': 438, 'total_tokens': 526}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ef2a0b21-d9dc-4595-ac1c-84f0ecaacfdf-0', usage_metadata={'input_tokens': 438, 'output_tokens': 88, 'total_tokens': 526}), 'parsed': None, 'parsing_error': None}
raw_schema2:{'raw': AIMessage(content='```json\n{\n "nodes": [\n {\n "id": "Elon Musk",\n "label": "person"\n },\n {\n "id": "OpenAI",\n "label": "organization"\n }\n ],\n "relationships": [\n {\n "source": "Elon Musk",\n "target": "OpenAI",\n "relationship": "SUING"\n }\n ]\n}\n```', response_metadata={'token_usage': {'completion_tokens': 88, 'prompt_tokens': 438, 'total_tokens': 526}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ef2a0b21-d9dc-4595-ac1c-84f0ecaacfdf-0', usage_metadata={'input_tokens': 438, 'output_tokens': 88, 'total_tokens': 526}), 'parsed': None, 'parsing_error': None}
nodes:[],relationships:[]
[GraphDocument(nodes=[], relationships=[], source=Document(page_content='Elon Musk is suing OpenAI'))] You can see the finalResult is empty ,but the gpt did break down nodes label and relationships :( Easier to debug and reproduce the problemI will put a temp APIKEY here (with almost 1$) to make you easier reproduce the problem :): llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.5,
api_key="sk-qV68spmkAgvNgRh7C0Dd540c3a6e4bE482016317657fA24f",
base_url="https://api.oneabc.org/v1"
) You can test any model like gpt-3.5-turbo ..... Try with Azure Open AI (success to extract)I tried using AzureOpenAI to extract entities and relationships for the same sentence and the result is success: from langchain_experimental.graph_transformers import LLMGraphTransformer
import os
from langchain_openai import AzureChatOpenAI
from langchain_core.documents import Document
os.environ["AZURE_OPENAI_API_KEY"] = "*************"
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://*********.openai.azure.com"
llm1 = AzureChatOpenAI(
azure_deployment="gpt-35-turbo",
api_version="2024-05-01-preview",
temperature=0.5,
model="gpt-35-turbo",
)
transformer = LLMGraphTransformer(
llm=llm1,
allowed_nodes=["Person", "Organization"],
strict_mode=False
)
doc = Document(page_content="Elon Musk is suing OpenAI")
graph_documents = transformer.convert_to_graph_documents([doc])
print(graph_documents) and the result is below : {'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_NkPw4CwhZ5vwjVHoH0Gs1cFF', 'function': {'arguments': '{"nodes":[{"id":"Elon Musk","type":"Person"},{"id":"OpenAI","type":"Organization"}],"relationships":[{"source_node_id":"Elon Musk","source_node_type":"Person","target_node_id":"OpenAI","target_node_type":"Organization","type":"SUING"}]}', 'name': 'DynamicGraph'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 58, 'prompt_tokens': 681, 'total_tokens': 739}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-08a6e834-c6db-48f6-ab92-662ed031b47f-0', tool_calls=[{'name': 'DynamicGraph', 'args': {'nodes': [{'id': 'Elon Musk', 'type': 'Person'}, {'id': 'OpenAI', 'type': 'Organization'}], 'relationships': [{'source_node_id': 'Elon Musk', 'source_node_type': 'Person', 'target_node_id': 'OpenAI', 'target_node_type': 'Organization', 'type': 'SUING'}]}, 'id': 'call_NkPw4CwhZ5vwjVHoH0Gs1cFF', 'type': 'tool_call'}], usage_metadata={'input_tokens': 681, 'output_tokens': 58, 'total_tokens': 739})}
{'parsed': DynamicGraph(nodes=[SimpleNode(id='Elon Musk', type='Person'), SimpleNode(id='OpenAI', type='Organization')], relationships=[SimpleRelationship(source_node_id='Elon Musk', source_node_type='Person', target_node_id='OpenAI', target_node_type='Organization', type='SUING')]), 'parsing_error': None}
raw_schema1:{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_NkPw4CwhZ5vwjVHoH0Gs1cFF', 'function': {'arguments': '{"nodes":[{"id":"Elon Musk","type":"Person"},{"id":"OpenAI","type":"Organization"}],"relationships":[{"source_node_id":"Elon Musk","source_node_type":"Person","target_node_id":"OpenAI","target_node_type":"Organization","type":"SUING"}]}', 'name': 'DynamicGraph'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 58, 'prompt_tokens': 681, 'total_tokens': 739}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_811936bd4f', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-08a6e834-c6db-48f6-ab92-662ed031b47f-0', tool_calls=[{'name': 'DynamicGraph', 'args': {'nodes': [{'id': 'Elon Musk', 'type': 'Person'}, {'id': 'OpenAI', 'type': 'Organization'}], 'relationships': [{'source_node_id': 'Elon Musk', 'source_node_type': 'Person', 'target_node_id': 'OpenAI', 'target_node_type': 'Organization', 'type': 'SUING'}]}, 'id': 'call_NkPw4CwhZ5vwjVHoH0Gs1cFF', 'type': 'tool_call'}], usage_metadata={'input_tokens': 681, 'output_tokens': 58, 'total_tokens': 739}), 'parsed': DynamicGraph(nodes=[SimpleNode(id='Elon Musk', type='Person'), SimpleNode(id='OpenAI', type='Organization')], relationships=[SimpleRelationship(source_node_id='Elon Musk', source_node_type='Person', target_node_id='OpenAI', target_node_type='Organization', type='SUING')]), 'parsing_error': None}
[GraphDocument(nodes=[Node(id='Elon Musk', type='Person'), Node(id='Openai', type='Organization')], relationships=[Relationship(source=Node(id='Elon Musk', type='Person'), target=Node(id='Openai', type='Organization'), type='SUING')], source=Document(page_content='Elon Musk is suing OpenAI'))] You can see some noticeable differences in the debug statement output from before System InfoSystem Information
------------------
> OS: Windows
> OS Version: 10.0.19045
> Python Version: 3.12.4 (tags/v3.12.4:8e8a4ba, Jun 6 2024, 19:30:16) [MSC v.1940 64 bit (AMD64)]
Package Information
-------------------
> langchain_core: 0.2.28
> langchain: 0.2.11
> langchain_community: 0.2.10
> langsmith: 0.1.94
> langchain_anthropic: 0.1.22
> langchain_experimental: 0.0.64
> langchain_google_genai: 1.0.8
> langchain_openai: 0.1.19
> langchain_text_splitters: 0.2.2
> langchainhub: 0.1.20
Packages not installed (Not Necessarily a Problem)
--------------------------------------------------
The following packages were not found:
> langgraph
> langserve |
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 2 replies
-
Hello @RidiculousBuffal! How can I assist you today? The issue you're encountering with Here are some steps to ensure proper configuration:
Here is a relevant part of the code that shows how values["openai_api_base"] = values["openai_api_base"] or os.getenv("OPENAI_API_BASE")
values["openai_proxy"] = get_from_dict_or_env(
values,
"openai_proxy",
"OPENAI_PROXY",
default="",
) If these values are not set correctly, the API requests might fail silently or not reach the intended endpoint, resulting in no response from the Additionally, the class LLMGraphTransformer:
def __init__(
self,
llm: BaseLanguageModel,
allowed_nodes: List[str] = [],
allowed_relationships: List[str] = [],
prompt: Optional[ChatPromptTemplate] = None,
strict_mode: bool = True,
node_properties: Union[bool, List[str]] = False,
relationship_properties: Union[bool, List[str]] = False,
) -> None:
self.allowed_nodes = allowed_nodes
self.allowed_relationships = allowed_relationships
self.strict_mode = strict_mode
self._function_call = True
# Check if the LLM really supports structured output
try:
llm.with_structured_output(_Graph)
except NotImplementedError:
self._function_call = False
if not self._function_call:
if node_properties or relationship_properties:
raise ValueError(
"The 'node_properties' and 'relationship_properties' parameters "
"cannot be used in combination with a LLM that doesn't support "
"native function calling."
) Ensure that the |
Beta Was this translation helpful? Give feedback.
-
😣This solution must not be the best solution A simple solutionNotice that @dosu mentions the function with_structured_output in init function, i found that my ChatOpenAI always pass the structured_output test but the output is empty.So I noticed immediately afterward that if i set DetailsI have changed the init func in llm to support the def __init__(
self,
llm: BaseLanguageModel,
allowed_nodes: List[str] = [],
allowed_relationships: List[str] = [],
prompt: Optional[ChatPromptTemplate] = None,
strict_mode: bool = True,
node_properties: Union[bool, List[str]] = False,
relationship_properties: Union[bool, List[str]] = False,
) -> None:
self.allowed_nodes = allowed_nodes
self.allowed_relationships = allowed_relationships
self.strict_mode = strict_mode
self._function_call = True
# Check if the LLM really supports structured output
#change here !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
try:
if llm.model_name.find('gpt')!=-1 :
self._function_call = False
else:
llm.with_structured_output(_Graph) and to solve some exception i also modify the def process_response(
self, document: Document, config: Optional[RunnableConfig] = None
) -> GraphDocument:
text = document.page_content
raw_schema = self.chain.invoke({"input": text}, config=config)
print(f"raw_schema1:{raw_schema}")
if self._function_call:
...........
else:
nodes_set = set()
relationships = []
if not isinstance(raw_schema, str):
raw_schema = raw_schema.content
parsed_json = self.json_repair.loads(raw_schema)
#somechange here!!!!
if type(parsed_json)==dict:
parsed_json = [parsed_json['properties']]
for rel in parsed_json:
.......... also you should pip install the json repair lib pip install json_repair ResultFinally the output is the same as the AzureOpenAI output from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
llm = ChatOpenAI(
model="gpt-4o-mini-2024-07-18",
temperature=0.5,
api_key="sk-***",
base_url="https://api.oneabc.org/v1"
)
transformer = LLMGraphTransformer(
llm=llm,
allowed_nodes=["Person", "Organization"],
strict_mode=False
)
doc = Document(page_content="Elon Musk is suing OpenAI")
graph_documents = transformer.convert_to_graph_documents([doc])
print(graph_documents) output: raw_schema1:content='{"properties":{"head":"Elon Musk","head_type":"Person","relation":"SUING","tail":"OpenAI","tail_type":"Organization"}}' response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 966, 'total_tokens': 995}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-1d9e5656-5756-4cf5-a593-2cb2860e7caa-0' usage_metadata={'input_tokens': 966, 'output_tokens': 29, 'total_tokens': 995}
{'properties': {'head': 'Elon Musk', 'head_type': 'Person', 'relation': 'SUING', 'tail': 'OpenAI', 'tail_type': 'Organization'}}
{'properties': {'head': 'Elon Musk', 'head_type': 'Person', 'relation': 'SUING', 'tail': 'OpenAI', 'tail_type': 'Organization'}}
[GraphDocument(nodes=[Node(id='Elon Musk', type='Person'), Node(id='OpenAI', type='Organization')], relationships=[Relationship(source=Node(id='Elon Musk', type='Person'), target=Node(id='OpenAI', type='Organization'), type='SUING')], source=Document(page_content='Elon Musk is suing OpenAI'))] |
Beta Was this translation helpful? Give feedback.
-
With langchain-experimental==0.3.2, you can try to turn ignore_tool_usage on when creating a LLMGraphTransformer object.
|
Beta Was this translation helpful? Give feedback.
😣This solution must not be the best solution
A simple solution
Notice that @dosu mentions the function with_structured_output in init function, i found that my ChatOpenAI always pass the structured_output test but the output is empty.So I noticed immediately afterward that if i set
self._function_call = False
the program will go to another logic in process_response which can process the raw json into node and relationship 👍Details
I have changed the init func in llm to support the
ChatOpenAI
Model with a proxy baseurl which process not well in LLMGraphTransformer