Skip to content

Commit a65e7b3

Browse files
feat: save config as yaml file when generating
1 parent c494093 commit a65e7b3

File tree

3 files changed

+20
-3
lines changed

3 files changed

+20
-3
lines changed

generate.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import time
44
import argparse
5+
import yaml
56
from dotenv import load_dotenv
67

78
from graphgen.graphgen import GraphGen
@@ -75,3 +76,5 @@
7576
graph_gen.judge(re_judge=False)
7677

7778
graph_gen.traverse()
79+
with open(os.path.join(sys_path, "cache", "configs", f"graphgen_{unique_id}.yaml"), "w", encoding='utf-8') as f:
80+
yaml.dump(traverse_strategy.to_yaml(), f)

graphgen/graphgen.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ async def async_split_chunks(self, data: Union[List[list], List[dict]], data_typ
6767
}
6868
_add_doc_keys = await self.full_docs_storage.filter_keys(list(new_docs.keys()))
6969
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
70-
if not len(new_docs):
70+
if len(new_docs) == 0:
7171
logger.warning("All docs are already in the storage")
7272
return {}
7373
logger.info(f"[New Docs] inserting {len(new_docs)} docs")
@@ -86,11 +86,12 @@ async def async_split_chunks(self, data: Union[List[list], List[dict]], data_typ
8686
elif data_type == "chunked":
8787
assert isinstance(data, list) and isinstance(data[0], list)
8888
new_docs = {
89-
compute_content_hash("".join(chunk['content']), prefix="doc-"): {'content': "".join(chunk['content'])} for doc in data for chunk in doc
89+
compute_content_hash("".join(chunk['content']), prefix="doc-"): {'content': "".join(chunk['content'])}
90+
for doc in data for chunk in doc
9091
}
9192
_add_doc_keys = await self.full_docs_storage.filter_keys(list(new_docs.keys()))
9293
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
93-
if not len(new_docs):
94+
if len(new_docs) == 0:
9495
logger.warning("All docs are already in the storage")
9596
return {}
9697
logger.info(f"[New Docs] inserting {len(new_docs)} docs")

models/strategy/travserse_strategy.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,16 @@ class TraverseStrategy(BaseStrategy):
1919
edge_sampling: str = "max_loss" # "max_loss" or "min_loss" or "random"
2020
# 孤立节点的处理策略
2121
isolated_node_strategy: str = "add" # "add" or "ignore"
22+
23+
def to_yaml(self):
24+
return {
25+
"traverse_strategy": {
26+
"expand_method": self.expand_method,
27+
"bidirectional": self.bidirectional,
28+
"max_extra_edges": self.max_extra_edges,
29+
"max_tokens": self.max_tokens,
30+
"max_depth": self.max_depth,
31+
"edge_sampling": self.edge_sampling,
32+
"isolated_node_strategy": self.isolated_node_strategy
33+
}
34+
}

0 commit comments

Comments
 (0)