Skip to content

Commit 0e791e4

Browse files
committed
Remove indexing embedding and input configs
1 parent fb38b41 commit 0e791e4

File tree

13 files changed

+31
-175
lines changed

13 files changed

+31
-175
lines changed

graphrag/api/query.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
import pandas as pd
2525
from pydantic import validate_call
2626

27-
from graphrag.config.models.graph_rag_config import GraphRagConfig
28-
from graphrag.index.config.embeddings import (
27+
from graphrag.config.embeddings import (
2928
community_full_content_embedding,
3029
entity_description_embedding,
3130
text_unit_text_embedding,
3231
)
32+
from graphrag.config.models.graph_rag_config import GraphRagConfig
3333
from graphrag.logger.print_progress import PrintProgressLogger
3434
from graphrag.query.factory import (
3535
get_basic_search_engine,

graphrag/index/config/__init__.py

Lines changed: 0 additions & 4 deletions
This file was deleted.

graphrag/index/config/input.py

Lines changed: 0 additions & 110 deletions
This file was deleted.

graphrag/index/errors.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

graphrag/index/flows/generate_text_embeddings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from graphrag.cache.pipeline_cache import PipelineCache
1111
from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
12-
from graphrag.index.config.embeddings import (
12+
from graphrag.config.embeddings import (
1313
community_full_content_embedding,
1414
community_summary_embedding,
1515
community_title_embedding,

graphrag/index/input/csv.py

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@
66
import logging
77
import re
88
from io import BytesIO
9-
from typing import cast
109

1110
import pandas as pd
1211

13-
from graphrag.index.config.input import PipelineCSVInputConfig, PipelineInputConfig
12+
from graphrag.config.models.input_config import InputConfig
1413
from graphrag.index.utils.hashing import gen_sha512_hash
1514
from graphrag.logger.base import ProgressLogger
1615
from graphrag.storage.pipeline_storage import PipelineStorage
@@ -23,13 +22,12 @@
2322

2423

2524
async def load(
26-
config: PipelineInputConfig,
25+
config: InputConfig,
2726
progress: ProgressLogger | None,
2827
storage: PipelineStorage,
2928
) -> pd.DataFrame:
3029
"""Load csv inputs from a directory."""
31-
csv_config = cast("PipelineCSVInputConfig", config)
32-
log.info("Loading csv files from %s", csv_config.base_dir)
30+
log.info("Loading csv files from %s", config.base_dir)
3331

3432
async def load_file(path: str, group: dict | None) -> pd.DataFrame:
3533
if group is None:
@@ -43,51 +41,49 @@ async def load_file(path: str, group: dict | None) -> pd.DataFrame:
4341
)
4442
if "id" not in data.columns:
4543
data["id"] = data.apply(lambda x: gen_sha512_hash(x, x.keys()), axis=1)
46-
if csv_config.source_column is not None and "source" not in data.columns:
47-
if csv_config.source_column not in data.columns:
44+
if config.source_column is not None and "source" not in data.columns:
45+
if config.source_column not in data.columns:
4846
log.warning(
4947
"source_column %s not found in csv file %s",
50-
csv_config.source_column,
48+
config.source_column,
5149
path,
5250
)
5351
else:
54-
data["source"] = data.apply(
55-
lambda x: x[csv_config.source_column], axis=1
56-
)
57-
if csv_config.text_column is not None and "text" not in data.columns:
58-
if csv_config.text_column not in data.columns:
52+
data["source"] = data.apply(lambda x: x[config.source_column], axis=1)
53+
if config.text_column is not None and "text" not in data.columns:
54+
if config.text_column not in data.columns:
5955
log.warning(
6056
"text_column %s not found in csv file %s",
61-
csv_config.text_column,
57+
config.text_column,
6258
path,
6359
)
6460
else:
65-
data["text"] = data.apply(lambda x: x[csv_config.text_column], axis=1)
66-
if csv_config.title_column is not None and "title" not in data.columns:
67-
if csv_config.title_column not in data.columns:
61+
data["text"] = data.apply(lambda x: x[config.text_column], axis=1)
62+
if config.title_column is not None and "title" not in data.columns:
63+
if config.title_column not in data.columns:
6864
log.warning(
6965
"title_column %s not found in csv file %s",
70-
csv_config.title_column,
66+
config.title_column,
7167
path,
7268
)
7369
else:
74-
data["title"] = data.apply(lambda x: x[csv_config.title_column], axis=1)
70+
data["title"] = data.apply(lambda x: x[config.title_column], axis=1)
7571

76-
if csv_config.timestamp_column is not None:
77-
fmt = csv_config.timestamp_format
72+
if config.timestamp_column is not None:
73+
fmt = config.timestamp_format
7874
if fmt is None:
7975
msg = "Must specify timestamp_format if timestamp_column is specified"
8076
raise ValueError(msg)
8177

82-
if csv_config.timestamp_column not in data.columns:
78+
if config.timestamp_column not in data.columns:
8379
log.warning(
8480
"timestamp_column %s not found in csv file %s",
85-
csv_config.timestamp_column,
81+
config.timestamp_column,
8682
path,
8783
)
8884
else:
8985
data["timestamp"] = pd.to_datetime(
90-
data[csv_config.timestamp_column], format=fmt
86+
data[config.timestamp_column], format=fmt
9187
)
9288

9389
# TODO: Theres probably a less gross way to do this

graphrag/index/input/factory.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
from graphrag.config.enums import InputType
1414
from graphrag.config.models.input_config import InputConfig
15-
from graphrag.index.config.input import PipelineInputConfig
1615
from graphrag.index.input.csv import input_type as csv
1716
from graphrag.index.input.csv import load as load_csv
1817
from graphrag.index.input.text import input_type as text
@@ -30,7 +29,7 @@
3029

3130

3231
async def create_input(
33-
config: PipelineInputConfig | InputConfig,
32+
config: InputConfig,
3433
progress_reporter: ProgressLogger | None = None,
3534
root_dir: str | None = None,
3635
) -> pd.DataFrame:

graphrag/index/input/text.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import pandas as pd
1212

13-
from graphrag.index.config.input import PipelineInputConfig
13+
from graphrag.config.models.input_config import InputConfig
1414
from graphrag.index.utils.hashing import gen_sha512_hash
1515
from graphrag.logger.base import ProgressLogger
1616
from graphrag.storage.pipeline_storage import PipelineStorage
@@ -23,7 +23,7 @@
2323

2424

2525
async def load(
26-
config: PipelineInputConfig,
26+
config: InputConfig,
2727
progress: ProgressLogger | None,
2828
storage: PipelineStorage,
2929
) -> pd.DataFrame:

graphrag/index/update/incremental_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
from graphrag.cache.pipeline_cache import PipelineCache
1212
from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
13+
from graphrag.config.embeddings import get_embedded_fields, get_embedding_settings
1314
from graphrag.config.models.graph_rag_config import GraphRagConfig
14-
from graphrag.index.config.embeddings import get_embedded_fields, get_embedding_settings
1515
from graphrag.index.flows.generate_text_embeddings import generate_text_embeddings
1616
from graphrag.index.update.communities import (
1717
_merge_and_resolve_nodes,

0 commit comments

Comments
 (0)