Skip to content

Commit d9e86d3

Browse files
committed
Update tests
1 parent 9c73259 commit d9e86d3

10 files changed

+25
-8
lines changed
1009 Bytes
Binary file not shown.
1.9 KB
Binary file not shown.
-13 Bytes
Binary file not shown.

tests/verbs/data/documents.parquet

61 Bytes
Binary file not shown.

tests/verbs/data/entities.parquet

15 Bytes
Binary file not shown.
-29 Bytes
Binary file not shown.
-10 Bytes
Binary file not shown.

tests/verbs/test_create_base_text_units.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
compare_outputs,
1212
create_test_context,
1313
load_test_table,
14+
update_document_metadata,
1415
)
1516

1617

@@ -43,6 +44,8 @@ async def test_create_base_text_units_metadata():
4344
config.input.metadata = ["title"]
4445
config.chunks.prepend_metadata = True
4546

47+
await update_document_metadata(config.input.metadata, context)
48+
4649
await run_workflow(
4750
config,
4851
context,
@@ -65,6 +68,8 @@ async def test_create_base_text_units_metadata_included_in_chunk():
6568
config.chunks.prepend_metadata = True
6669
config.chunks.chunk_size_includes_metadata = True
6770

71+
await update_document_metadata(config.input.metadata, context)
72+
6873
await run_workflow(
6974
config,
7075
context,

tests/verbs/test_create_final_documents.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
compare_outputs,
1414
create_test_context,
1515
load_test_table,
16+
update_document_metadata,
1617
)
1718

1819

@@ -37,15 +38,18 @@ async def test_create_final_documents():
3738

3839

3940
async def test_create_final_documents_with_metadata_column():
40-
expected = load_test_table("documents")
41-
4241
context = await create_test_context(
4342
storage=["text_units"],
4443
)
4544

4645
config = create_graphrag_config({"models": DEFAULT_MODEL_CONFIG})
4746
config.input.metadata = ["title"]
4847

48+
# simulate the metadata construction during initial input loading
49+
await update_document_metadata(config.input.metadata, context)
50+
51+
expected = await load_table_from_storage("documents", context.storage)
52+
4953
await run_workflow(
5054
config,
5155
context,
@@ -54,12 +58,12 @@ async def test_create_final_documents_with_metadata_column():
5458

5559
actual = await load_table_from_storage("documents", context.storage)
5660

57-
# we should have dropped "title" and added "attributes"
58-
# our test dataframe does not have attributes, so we'll assert without it
61+
# our test dataframe does not have metadata, so we'll assert without it
5962
# and separately confirm it is in the output
6063
compare_outputs(
61-
actual, expected, columns=["id", "human_readable_id", "text", "text_unit_ids"]
64+
actual, expected, columns=["id", "human_readable_id", "text", "metadata"]
6265
)
63-
assert len(actual.columns) == 6
66+
assert len(actual.columns) == 7
6467
assert "title" in actual.columns
68+
assert "text_unit_ids" in actual.columns
6569
assert "metadata" in actual.columns

tests/verbs/util.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import graphrag.config.defaults as defs
88
from graphrag.index.context import PipelineRunContext
99
from graphrag.index.run.utils import create_run_context
10-
from graphrag.utils.storage import write_table_to_storage
10+
from graphrag.utils.storage import load_table_from_storage, write_table_to_storage
1111

1212
pd.set_option("display.max_columns", None)
1313

@@ -43,7 +43,6 @@ async def create_test_context(storage: list[str] | None = None) -> PipelineRunCo
4343
if storage:
4444
for name in storage:
4545
table = load_test_table(name)
46-
# normal storage interface insists on bytes
4746
await write_table_to_storage(table, name, context.storage)
4847

4948
return context
@@ -83,3 +82,12 @@ def compare_outputs(
8382
print("Actual:")
8483
print(actual[column])
8584
raise
85+
86+
87+
async def update_document_metadata(metadata: list[str], context: PipelineRunContext):
88+
"""Takes the default documents and adds the configured metadata columns for later parsing by the text units and final documents workflows."""
89+
documents = await load_table_from_storage("documents", context.storage)
90+
documents["metadata"] = documents[metadata].apply(lambda row: row.to_dict(), axis=1)
91+
await write_table_to_storage(
92+
documents, "documents", context.storage
93+
) # write to the runtime context storage only

0 commit comments

Comments
 (0)