Skip to content

Commit 5f9ad0d

Browse files
Chore/gleanings any encoding (#1569)
* Make claims and entities independent of encoding * Semver * Change semver release type
1 parent 2abd6c5 commit 5f9ad0d

File tree

5 files changed

+12
-8
lines changed

5 files changed

+12
-8
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "minor",
3+
"description": "Make gleanings independent of encoding"
4+
}

graphrag/index/operations/extract_covariates/claim_extractor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ def __init__(
8888

8989
# Construct the looping arguments
9090
encoding = tiktoken.get_encoding(encoding_model or defs.ENCODING_MODEL)
91-
yes = f"{encoding.encode('YES')[0]}"
92-
no = f"{encoding.encode('NO')[0]}"
91+
yes = f"{encoding.encode('Y')[0]}"
92+
no = f"{encoding.encode('N')[0]}"
9393
self._loop_args = {"logit_bias": {yes: 100, no: 100}, "max_tokens": 1}
9494

9595
async def __call__(
@@ -195,7 +195,7 @@ async def _process_document(
195195
history=response.history,
196196
model_parameters=self._loop_args,
197197
)
198-
if response.output.content != "YES":
198+
if response.output.content != "Y":
199199
break
200200

201201
return self._parse_claim_tuples(results, prompt_args)

graphrag/index/operations/extract_entities/graph_extractor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ def __init__(
9292

9393
# Construct the looping arguments
9494
encoding = tiktoken.get_encoding(encoding_model or defs.ENCODING_MODEL)
95-
yes = f"{encoding.encode('YES')[0]}"
96-
no = f"{encoding.encode('NO')[0]}"
95+
yes = f"{encoding.encode('Y')[0]}"
96+
no = f"{encoding.encode('N')[0]}"
9797
self._loop_args = {"logit_bias": {yes: 100, no: 100}, "max_tokens": 1}
9898

9999
async def __call__(
@@ -180,7 +180,7 @@ async def _process_document(
180180
model_parameters=self._loop_args,
181181
)
182182

183-
if response.output.content != "YES":
183+
if response.output.content != "Y":
184184
break
185185

186186
return results

graphrag/prompts/index/claim_extraction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,4 @@
5858

5959

6060
CONTINUE_PROMPT = "MANY entities were missed in the last extraction. Add them below using the same format:\n"
61-
LOOP_PROMPT = "It appears some entities may have still been missed. Answer YES {tuple_delimiter} NO if there are still entities that need to be added.\n"
61+
LOOP_PROMPT = "It appears some entities may have still been missed. Answer Y or N if there are still entities that need to be added.\n"

graphrag/prompts/index/entity_extraction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,4 @@
126126
Output:"""
127127

128128
CONTINUE_PROMPT = "MANY entities and relationships were missed in the last extraction. Remember to ONLY emit entities that match any of the previously extracted types. Add them below using the same format:\n"
129-
LOOP_PROMPT = "It appears some entities and relationships may have still been missed. Answer YES | NO if there are still entities or relationships that need to be added.\n"
129+
LOOP_PROMPT = "It appears some entities and relationships may have still been missed. Answer Y or N if there are still entities or relationships that need to be added.\n"

0 commit comments

Comments
 (0)