Skip to content

Commit b5a1b09

Browse files
refactor: reorganize modules
1 parent bd5b3d2 commit b5a1b09

File tree

10 files changed

+207
-20
lines changed

10 files changed

+207
-20
lines changed

.gitignore

Lines changed: 174 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,178 @@
1-
__pycache__
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# UV
98+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
#uv.lock
102+
103+
# poetry
104+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105+
# This is especially recommended for binary packages to ensure reproducibility, and is more
106+
# commonly ignored for libraries.
107+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108+
#poetry.lock
109+
110+
# pdm
111+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112+
#pdm.lock
113+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114+
# in version control.
115+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116+
.pdm.toml
117+
.pdm-python
118+
.pdm-build/
119+
120+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121+
__pypackages__/
122+
123+
# Celery stuff
124+
celerybeat-schedule
125+
celerybeat.pid
126+
127+
# SageMath parsed files
128+
*.sage.py
129+
130+
# Environments
2131
.env
132+
.venv
133+
env/
134+
venv/
135+
ENV/
136+
env.bak/
137+
venv.bak/
138+
139+
# Spyder project settings
140+
.spyderproject
141+
.spyproject
142+
143+
# Rope project settings
144+
.ropeproject
145+
146+
# mkdocs documentation
147+
/site
148+
149+
# mypy
150+
.mypy_cache/
151+
.dmypy.json
152+
dmypy.json
153+
154+
# Pyre type checker
155+
.pyre/
156+
157+
# pytype static type analyzer
158+
.pytype/
159+
160+
# Cython debug symbols
161+
cython_debug/
162+
163+
# PyCharm
164+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166+
# and can be added to the global gitignore or merged into this file. For a more nuclear
167+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
168+
.idea/
169+
170+
# Ruff stuff:
171+
.ruff_cache/
172+
173+
# PyPI configuration file
174+
.pypirc
175+
3176
cache
4-
*.log
5177
*.pyc
6178
*.html
7-
.idea/
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ input_file: resources/examples/raw_demo.jsonl
33
tokenizer: cl100k_base
44
quiz_samples: 2
55
traverse_strategy:
6-
qa_form: atomic
6+
qa_form: open
77
bidirectional: true
88
difficulty_order:
99
- medium

evaluate.py renamed to graphgen/evaluate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import argparse
66
import pandas as pd
77
from dotenv import load_dotenv
8-
from graphgen.models import LengthEvaluator, MTLDEvaluator, RewardEvaluator, TextPair, UniEvaluator
9-
from graphgen.utils import logger, set_logger
8+
from .models import LengthEvaluator, MTLDEvaluator, RewardEvaluator, TextPair, UniEvaluator
9+
from .utils import logger, set_logger
1010

1111
sys_path = os.path.abspath(os.path.dirname(__file__))
1212
set_logger(os.path.join(sys_path, "cache", "logs", "evaluate.log"))

generate.py renamed to graphgen/generate.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,20 @@
55
import yaml
66
from dotenv import load_dotenv
77

8-
from graphgen.graphgen import GraphGen
9-
from graphgen.models import OpenAIModel, Tokenizer, TraverseStrategy
10-
from graphgen.utils import set_logger
8+
from .graphgen import GraphGen
9+
from .models import OpenAIModel, Tokenizer, TraverseStrategy
10+
from .utils import set_logger
1111

1212
sys_path = os.path.abspath(os.path.dirname(__file__))
13-
unique_id = int(time.time())
14-
set_logger(os.path.join(sys_path, "cache", "logs", f"graphgen_{unique_id}.log"), if_stream=False)
15-
config_path = os.path.join(sys_path, "cache", "data", "graphgen", str(unique_id), f"config-{unique_id}.yaml")
1613

1714
load_dotenv()
1815

19-
def save_config(global_config):
16+
def set_working_dir(folder):
17+
os.makedirs(folder, exist_ok=True)
18+
os.makedirs(os.path.join(folder, "data", "graphgen"), exist_ok=True)
19+
os.makedirs(os.path.join(folder, "logs"), exist_ok=True)
20+
21+
def save_config(config_path, global_config):
2022
if not os.path.exists(os.path.dirname(config_path)):
2123
os.makedirs(os.path.dirname(config_path))
2224
with open(config_path, "w", encoding='utf-8') as config_file:
@@ -28,7 +30,19 @@ def save_config(global_config):
2830
help='Config parameters for GraphGen.',
2931
default='graphgen_config.yaml',
3032
type=str)
33+
parser.add_argument('--output_dir',
34+
help='Output directory for GraphGen.',
35+
default=sys_path,
36+
required=True,
37+
type=str)
38+
3139
args = parser.parse_args()
40+
41+
working_dir = args.output_dir
42+
set_working_dir(working_dir)
43+
unique_id = int(time.time())
44+
set_logger(os.path.join(working_dir, "logs", f"graphgen_{unique_id}.log"), if_stream=False)
45+
3246
with open(args.config_file, "r", encoding='utf-8') as f:
3347
config = yaml.load(f, Loader=yaml.FullLoader)
3448

@@ -59,6 +73,7 @@ def save_config(global_config):
5973
)
6074

6175
graph_gen = GraphGen(
76+
working_dir=working_dir,
6277
unique_id=unique_id,
6378
synthesizer_llm_client=synthesizer_llm_client,
6479
trainee_llm_client=trainee_llm_client,
@@ -77,4 +92,5 @@ def save_config(global_config):
7792

7893
graph_gen.traverse()
7994

80-
save_config(config)
95+
path = os.path.join(working_dir, "data", "graphgen", str(unique_id), f"config-{unique_id}.yaml")
96+
save_config(path, config)

judge.py renamed to graphgen/judge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import asyncio
44
from dotenv import load_dotenv
55

6-
from graphgen.models import NetworkXStorage, JsonKVStorage, OpenAIModel
7-
from graphgen.operators import judge_statement
6+
from .models import NetworkXStorage, JsonKVStorage, OpenAIModel
7+
from .operators import judge_statement
88

99
sys_path = os.path.abspath(os.path.dirname(__file__))
1010

scripts/evaluate.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
python3 evaluate.py --folder cache/data \
1+
python3 -m graphgen.evaluate --folder cache/data \
22
--output cache/output \
33
--reward "OpenAssistant/reward-model-deberta-v3-large-v2,BAAI/IndustryCorpus2_DataRater" \
44
--uni MingZhong/unieval-sum \

scripts/generate.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
python3 generate.py --config_file configs/graphgen_config.yaml
1+
python3 -m graphgen.generate --config_file graphgen/configs/graphgen_config.yaml --output_dir cache/

scripts/judge.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
python3 judge.py --input cache \
1+
python3 -m graphgen.judge --input cache \
22
--output cache/output/new_graph.graphml \

webui/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def run_graphgen(*arguments: list, progress=gr.Progress()):
204204
<img src="https://img.shields.io/github/stars/open-sciencelab/GraphGen?style=social" alt="GitHub Stars">
205205
</a>
206206
<a href="https://arxiv.org/xxxxx">
207-
<img src="https://img.shields.io/badge/arXiv-2401.00001-yellow" alt="arXiv">
207+
<img src="https://img.shields.io/badge/arXiv-xxxxx-yellow" alt="arXiv">
208208
</a>
209209
</div>
210210
""")

0 commit comments

Comments
 (0)