Skip to content

Commit ea1603b

Browse files
fix: fix lint problem
1 parent a6aedaf commit ea1603b

File tree

9 files changed

+22
-27
lines changed

9 files changed

+22
-27
lines changed

graphgen/operators/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
from .build_kg import BuildKGService
22
from .chunk import ChunkService
3-
from .extract import extract_info
3+
from .extract import extract
44
from .generate import generate_qas
5-
from .partition import partition_kg
5+
from .judge import JudgeService
6+
from .partition import PartitionService
67
from .quiz import QuizService
78
from .read import read
89
from .search import search_all
9-
from .judge import JudgeService
1010

1111
operators = {
1212
"read": read,
1313
"chunk": ChunkService,
1414
"build_kg": BuildKGService,
1515
"quiz": QuizService,
1616
"judge": JudgeService,
17-
"extract_info": extract_info,
17+
"extract_info": extract,
1818
"search_all": search_all,
19-
"partition_kg": partition_kg,
19+
"partition": PartitionService,
2020
"generate_qas": generate_qas,
2121
}

graphgen/operators/chunk/chunk_service.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@ def chunk_documents(self, new_docs: list) -> list:
9494
**doc,
9595
}
9696
)
97-
self.chunk_storage.upsert(
98-
{chunk["_chunk_id"]: chunk for chunk in chunks}
99-
)
97+
self.chunk_storage.upsert({chunk["_chunk_id"]: chunk for chunk in chunks})
10098
self.chunk_storage.index_done_callback()
10199
return chunks
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .extract import extract_info
1+
from .extract import extract

graphgen/operators/extract/extract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from graphgen.utils import logger, run_concurrent
88

99

10-
async def extract_info(
10+
async def extract(
1111
llm_client: BaseLLMWrapper,
1212
chunk_storage: BaseKVStorage,
1313
extract_config: dict,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .generate_qas import generate_qas
1+
from .generate import generate_qas

graphgen/operators/generate/generate.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ async def generate_qas(
5252
batches,
5353
desc="[4/4]Generating QAs",
5454
unit="batch",
55-
progress_bar=progress_bar,
5655
)
5756

5857
# format

graphgen/operators/judge/judge_service.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,11 @@ def judge(self, items: list[dict]) -> None:
5959
if isinstance(index, str):
6060
node_id = index
6161
node_data = self.graph_storage.get_node(node_id)
62-
if node_data:
63-
node_data["loss"] = loss
64-
self.graph_storage.update_node(node_id, node_data)
62+
node_data["loss"] = loss
63+
self.graph_storage.update_node(node_id, node_data)
6564
elif isinstance(index, tuple):
6665
edge_source, edge_target = index
6766
edge_data = self.graph_storage.get_edge(edge_source, edge_target)
68-
if edge_data:
69-
edge_data["loss"] = loss
70-
self.graph_storage.update_edge(edge_source, edge_target, edge_data)
67+
edge_data["loss"] = loss
68+
self.graph_storage.update_edge(edge_source, edge_target, edge_data)
7169
self.graph_storage.index_done_callback()

graphgen/operators/partition/partition_service.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from typing import Any, Iterable
2+
from typing import Iterable
33

44
import pandas as pd
55

@@ -101,7 +101,7 @@ def _pre_tokenize(self) -> None:
101101
node_data["length"] = len(tokens)
102102
self.kg_instance.update_node(node_id, node_data)
103103
except Exception as e:
104-
logger.warning(f"Failed to tokenize node {node_id}: {e}")
104+
logger.warning("Failed to tokenize node %s: %s", node_id, e)
105105
node_data["length"] = 0
106106

107107
# Process edges
@@ -113,7 +113,7 @@ def _pre_tokenize(self) -> None:
113113
edge_data["length"] = len(tokens)
114114
self.kg_instance.update_edge(u, v, edge_data)
115115
except Exception as e:
116-
logger.warning(f"Failed to tokenize edge {u}-{v}: {e}")
116+
logger.warning("Failed to tokenize edge %s-%s: %s", u, v, e)
117117
edge_data["length"] = 0
118118

119119
# Persist changes

graphgen/operators/quiz/quiz_service.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from graphgen.bases import BaseGraphStorage, BaseKVStorage, BaseLLMWrapper
66
from graphgen.common import init_llm, init_storage
77
from graphgen.models import QuizGenerator
8-
from graphgen.utils import compute_content_hash, logger, run_concurrent
8+
from graphgen.utils import compute_dict_hash, logger, run_concurrent
99

1010

1111
class QuizService:
@@ -20,7 +20,7 @@ def __init__(
2020
self.graph_storage: BaseGraphStorage = init_storage(
2121
backend="networkx", working_dir=working_dir, namespace="graph"
2222
)
23-
# { _description_id: { "description": str, "quizzes": List[Tuple[str, str]] } }
23+
# { _quiz_id: { "description": str, "quizzes": List[Tuple[str, str]] } }
2424
self.quiz_storage: BaseKVStorage = init_storage(
2525
backend="json_kv", working_dir=working_dir, namespace="quiz"
2626
)
@@ -37,8 +37,8 @@ def __call__(self, batch: pd.DataFrame) -> Iterable[pd.DataFrame]:
3737
async def _process_single_quiz(self, item: tuple) -> dict | None:
3838
# if quiz in quiz_storage exists already, directly get it
3939
index, desc = item
40-
_description_id = compute_content_hash(desc, prefix="quiz-")
41-
if self.quiz_storage.get_by_id(_description_id):
40+
_quiz_id = compute_dict_hash({"index": index, "description": desc})
41+
if self.quiz_storage.get_by_id(_quiz_id):
4242
return None
4343

4444
tasks = []
@@ -56,7 +56,7 @@ async def _process_single_quiz(self, item: tuple) -> dict | None:
5656
rephrased_text = self.generator.parse_rephrased_text(new_description)
5757
quizzes.append((rephrased_text, gt))
5858
return {
59-
"_description_id": _description_id,
59+
"_quiz_id": _quiz_id,
6060
"description": desc,
6161
"index": index,
6262
"quizzes": quizzes,
@@ -100,7 +100,7 @@ def quiz(self) -> Iterable[pd.DataFrame]:
100100
if new_result:
101101
self.quiz_storage.upsert(
102102
{
103-
new_result["_description_id"]: {
103+
new_result["_quiz_id"]: {
104104
"description": new_result["description"],
105105
"quizzes": new_result["quizzes"],
106106
}

0 commit comments

Comments
 (0)