Skip to content

Commit 9bfe271

Browse files
INTPYTHON-629 Take 2 - Encouraged LLM to find as many entities and relationships as it could (#148)
1 parent b136893 commit 9bfe271

File tree

3 files changed

+11
-32
lines changed

3 files changed

+11
-32
lines changed

libs/langchain-mongodb/langchain_mongodb/graphrag/graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def __init__(
9898
entity_extraction_model: BaseChatModel,
9999
entity_prompt: Optional[ChatPromptTemplate] = None,
100100
query_prompt: Optional[ChatPromptTemplate] = None,
101-
max_depth: int = 2,
101+
max_depth: int = 3,
102102
allowed_entity_types: Optional[List[str]] = None,
103103
allowed_relationship_types: Optional[List[str]] = None,
104104
entity_examples: Optional[str] = None,
@@ -235,7 +235,7 @@ def from_connection_string(
235235
entity_extraction_model: BaseChatModel,
236236
entity_prompt: ChatPromptTemplate = prompts.entity_prompt,
237237
query_prompt: ChatPromptTemplate = prompts.query_prompt,
238-
max_depth: int = 2,
238+
max_depth: int = 3,
239239
allowed_entity_types: Optional[List[str]] = None,
240240
allowed_relationship_types: Optional[List[str]] = None,
241241
entity_examples: Optional[str] = None,

libs/langchain-mongodb/langchain_mongodb/graphrag/prompts.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,9 @@
66

77
ENTITY_EXTRACTION_INSTRUCTIONS = """
88
## Overview
9-
You are a meticulous analyst tasked with extracting information from unstructured text
9+
You are a meticulous analyst tasked with identifying potential entities from unstructured text
1010
to build a knowledge graph in a structured json format of entities (nodes) and their relationships (edges).
11-
The graph will be stored in a MongoDB Collection and traversed using $graphLookup
12-
from starting points of entity nodes matching names found in a query, and follow their relationships.
13-
14-
Use the following as guidelines.
15-
16-
- Simplicity: The graph should have as few entities and relationship types as needed to convey the information in the input.
17-
- Consistency: Connections can only be made if entities and relationships use consistent naming.
18-
- Generality: The graph should be useful for describing the concepts in not just this document but other similar documents.
19-
- Accuracy: Do not add any information that is not explicitly mentioned in the text.
11+
**Include as many entities and relationships as you can.**
2012
2113
INPUT: You will be provided a text document.
2214
OUTPUT:
@@ -84,21 +76,19 @@
8476

8577

8678
NAME_EXTRACTION_INSTRUCTIONS = """
87-
You are a meticulous analyst tasked with extracting information from documents to form
88-
knowledge graphs of entities (nodes) and their relationships (edges).
89-
90-
You will be provided a short document (query) from which you infer the entity names.
91-
You need not think about relationships between the entities. You only need names.
79+
You are an analyst tasked with identifying potential entities in text documents.
80+
You will be provided a short document from which you infer entity names.
81+
Identify as many as possible.
9282
9383
Provide your response as a valid JSON Array of entity names
9484
or human-readable identifiers, found in the text.
9585
9686
**Allowed Entity Types**:
97-
- Extract ONLY entities whose `type` matches one of the following: {allowed_entity_types}.
98-
- NOTE: If this list is empty, ANY `type` is permitted.
87+
- By default, all types are permitted.
88+
- If a non-empty list is provided, extract ONLY entities whose `type` matches one of the following: [{allowed_entity_types}].
9989
10090
### Examples of Exclusions:
101-
- If `allowed_entity_types` is `["Person", "Organization"]`, and the text mentions "Event" or "Location",
91+
- If `allowed_entity_types` is `["Person", "Organization"]`, and the text mentions an "Event" or "Location",
10292
these entities must **NOT** be included in the output.
10393
10494
## Examples:

libs/langchain-mongodb/tests/integration_tests/test_graphrag.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from typing import Generator
55

66
import pytest
7-
from flaky import flaky # type:ignore[import-untyped]
87
from langchain_core.documents import Document
98
from langchain_core.language_models.chat_models import BaseChatModel
109
from langchain_core.messages import AIMessage
@@ -69,7 +68,7 @@ def documents():
6968
GreenTech Ltd. Leads SolarGrid Initiative
7069
7170
San Francisco, CA — GreenTech Ltd. has emerged as a leader in renewable energy projects with the SolarGrid Initiative,
72-
a collaboration with ACME Corporation. Jane Smith, the SolarGrid project’s Lead Manager, highlighted its ambitious goal:
71+
a collaboration with ACME Corporation. GreenTech's Jane Smith, the SolarGrid project’s Lead Manager, highlighted its ambitious goal:
7372
providing affordable solar energy to underserved communities.
7473
7574
GreenTech, headquartered in San Francisco, has worked closely with ACME since their partnership began in May 2021.
@@ -125,8 +124,6 @@ def graph_store(collection, entity_extraction_model, documents) -> MongoDBGraphS
125124
store = MongoDBGraphStore(
126125
collection=collection,
127126
entity_extraction_model=entity_extraction_model,
128-
entity_prompt=entity_prompt,
129-
query_prompt=query_prompt,
130127
)
131128
bulkwrite_results = store.add_documents(documents)
132129
assert len(bulkwrite_results) == len(documents)
@@ -145,7 +142,6 @@ def test_add_docs_store(graph_store):
145142
assert 4 <= len(extracted_entities) < 8
146143

147144

148-
@flaky(max_runs=3, min_passes=2)
149145
def test_extract_entity_names(graph_store, query_connection):
150146
query_entity_names = graph_store.extract_entity_names(query_connection)
151147
assert set(query_entity_names) == {"John Doe", "Jane Smith"}
@@ -157,7 +153,6 @@ def test_extract_entities_from_empty_string_names(graph_store):
157153
assert len(no_names) == 0
158154

159155

160-
@flaky(max_runs=3, min_passes=1)
161156
def test_related_entities(graph_store):
162157
entity_names = ["John Doe", "Jane Smith"]
163158
related_entities = graph_store.related_entities(entity_names)
@@ -168,7 +163,6 @@ def test_related_entities(graph_store):
168163
assert len(no_entities) == 0
169164

170165

171-
@flaky(max_runs=3, min_passes=1)
172166
def test_additional_entity_examples(entity_extraction_model, entity_example, documents):
173167
# First, create one client just to drop any existing collections
174168
client = MongoClient(CONNECTION_STRING)
@@ -192,15 +186,13 @@ def test_additional_entity_examples(entity_extraction_model, entity_example, doc
192186
assert len(new_entities) >= 2
193187

194188

195-
@flaky(max_runs=3, min_passes=1)
196189
def test_chat_response(graph_store, query_connection):
197190
"""Displays querying an existing Knowledge Graph Database"""
198191
answer = graph_store.chat_response(query_connection)
199192
assert isinstance(answer, AIMessage)
200193
assert "acme corporation" in answer.content.lower()
201194

202195

203-
@flaky(max_runs=3, min_passes=1)
204196
def test_similarity_search(graph_store, query_connection):
205197
docs = graph_store.similarity_search(query_connection)
206198
assert len(docs) >= 4
@@ -209,7 +201,6 @@ def test_similarity_search(graph_store, query_connection):
209201
assert any("attributes" in d.keys() for d in docs)
210202

211203

212-
@flaky(max_runs=3, min_passes=1)
213204
def test_validator(documents, entity_extraction_model):
214205
# Case 1. No existing collection.
215206
client = MongoClient(CONNECTION_STRING)
@@ -263,7 +254,6 @@ def test_validator(documents, entity_extraction_model):
263254
client.close()
264255

265256

266-
@flaky(max_runs=3, min_passes=1)
267257
def test_allowed_entity_types(documents, entity_extraction_model):
268258
"""Add allowed_entity_types. Use the validator to confirm behaviour."""
269259
allowed_entity_types = ["Person"]
@@ -291,7 +281,6 @@ def test_allowed_entity_types(documents, entity_extraction_model):
291281
all([len(e["relationships"].get("attributes", [])) == 0 for e in entities])
292282

293283

294-
@flaky(max_runs=3, min_passes=1)
295284
def test_allowed_relationship_types(documents, entity_extraction_model):
296285
# drop collection
297286
client = MongoClient(CONNECTION_STRING)

0 commit comments

Comments
 (0)