Skip to content

Commit 7186cfe

Browse files
Merge pull request #56 from open-sciencelab/e2e-tests
tests: add e2e tests
2 parents 6d22862 + f3a8787 commit 7186cfe

File tree

7 files changed

+215
-10
lines changed

7 files changed

+215
-10
lines changed

graphgen/generate.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616

1717
def set_working_dir(folder):
1818
os.makedirs(folder, exist_ok=True)
19-
os.makedirs(os.path.join(folder, "data", "graphgen"), exist_ok=True)
20-
os.makedirs(os.path.join(folder, "logs"), exist_ok=True)
2119

2220

2321
def save_config(config_path, global_config):
@@ -48,17 +46,20 @@ def main():
4846
args = parser.parse_args()
4947

5048
working_dir = args.output_dir
51-
set_working_dir(working_dir)
5249

5350
with open(args.config_file, "r", encoding="utf-8") as f:
5451
config = yaml.load(f, Loader=yaml.FullLoader)
5552

5653
output_data_type = config["output_data_type"]
5754
unique_id = int(time.time())
55+
56+
output_path = os.path.join(
57+
working_dir, "data", "graphgen", f"{unique_id}_{output_data_type}"
58+
)
59+
set_working_dir(output_path)
60+
5861
set_logger(
59-
os.path.join(
60-
working_dir, "logs", f"graphgen_{output_data_type}_{unique_id}.log"
61-
),
62+
os.path.join(output_path, f"{unique_id}.log"),
6263
if_stream=True,
6364
)
6465
logger.info(
@@ -94,8 +95,7 @@ def main():
9495
else:
9596
raise ValueError(f"Unsupported output data type: {output_data_type}")
9697

97-
output_path = os.path.join(working_dir, "data", "graphgen", str(unique_id))
98-
save_config(os.path.join(output_path, f"config-{unique_id}.yaml"), config)
98+
save_config(os.path.join(output_path, "config.yaml"), config)
9999
logger.info("GraphGen completed successfully. Data saved to %s", output_path)
100100

101101

graphgen/graphgen.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,13 @@ def __post_init__(self):
102102
self.working_dir, namespace="rephrase"
103103
)
104104
self.qa_storage: JsonListStorage = JsonListStorage(
105-
os.path.join(self.working_dir, "data", "graphgen", str(self.unique_id)),
106-
namespace=f"qa-{self.unique_id}",
105+
os.path.join(
106+
self.working_dir,
107+
"data",
108+
"graphgen",
109+
f"{self.unique_id}_{self.config['output_data_type']}",
110+
),
111+
namespace="qa",
107112
)
108113

109114
async def async_split_chunks(self, data: List[Union[List, Dict]]) -> dict:

tests/e2e_tests/__init__.py

Whitespace-only changes.
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import json
2+
import os
3+
import subprocess
4+
from pathlib import Path
5+
6+
7+
def test_generate_aggregated(tmp_path: Path):
8+
repo_root = Path(__file__).resolve().parents[2]
9+
os.chdir(repo_root)
10+
11+
config_path = repo_root / "graphgen" / "configs" / "aggregated_config.yaml"
12+
output_dir = tmp_path / "output"
13+
output_dir.mkdir(parents=True, exist_ok=True)
14+
15+
result = subprocess.run(
16+
[
17+
"python",
18+
"-m",
19+
"graphgen.generate",
20+
"--config_file",
21+
str(config_path),
22+
"--output_dir",
23+
str(output_dir),
24+
],
25+
capture_output=True,
26+
text=True,
27+
check=False,
28+
)
29+
assert result.returncode == 0, f"Script failed with error: {result.stderr}"
30+
31+
data_root = output_dir / "data" / "graphgen"
32+
assert data_root.exists(), f"{data_root} does not exist"
33+
run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
34+
assert run_folders, f"No run folders found in {data_root}"
35+
run_folder = run_folders[0]
36+
37+
config_saved = run_folder / "config.yaml"
38+
assert config_saved.exists(), f"{config_saved} not found"
39+
40+
json_files = list(run_folder.glob("*.json"))
41+
assert json_files, f"No JSON output found in {run_folder}"
42+
43+
log_files = list(run_folder.glob("*.log"))
44+
assert log_files, "No log file generated"
45+
46+
with open(json_files[0], "r", encoding="utf-8") as f:
47+
data = json.load(f)
48+
assert (
49+
isinstance(data, list) and len(data) > 0
50+
), "JSON output is empty or not a list"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import json
2+
import os
3+
import subprocess
4+
from pathlib import Path
5+
6+
7+
def test_generate_atomic(tmp_path: Path):
8+
repo_root = Path(__file__).resolve().parents[2]
9+
os.chdir(repo_root)
10+
11+
config_path = repo_root / "graphgen" / "configs" / "atomic_config.yaml"
12+
output_dir = tmp_path / "output"
13+
output_dir.mkdir(parents=True, exist_ok=True)
14+
15+
result = subprocess.run(
16+
[
17+
"python",
18+
"-m",
19+
"graphgen.generate",
20+
"--config_file",
21+
str(config_path),
22+
"--output_dir",
23+
str(output_dir),
24+
],
25+
capture_output=True,
26+
text=True,
27+
check=False,
28+
)
29+
assert result.returncode == 0, f"Script failed with error: {result.stderr}"
30+
31+
data_root = output_dir / "data" / "graphgen"
32+
assert data_root.exists(), f"{data_root} does not exist"
33+
run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
34+
assert run_folders, f"No run folders found in {data_root}"
35+
run_folder = run_folders[0]
36+
37+
config_saved = run_folder / "config.yaml"
38+
assert config_saved.exists(), f"{config_saved} not found"
39+
40+
json_files = list(run_folder.glob("*.json"))
41+
assert json_files, f"No JSON output found in {run_folder}"
42+
43+
log_files = list(run_folder.glob("*.log"))
44+
assert log_files, "No log file generated"
45+
46+
with open(json_files[0], "r", encoding="utf-8") as f:
47+
data = json.load(f)
48+
assert (
49+
isinstance(data, list) and len(data) > 0
50+
), "JSON output is empty or not a list"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import json
2+
import os
3+
import subprocess
4+
from pathlib import Path
5+
6+
7+
def test_generate_aggregated(tmp_path: Path):
8+
repo_root = Path(__file__).resolve().parents[2]
9+
os.chdir(repo_root)
10+
11+
config_path = repo_root / "graphgen" / "configs" / "cot_config.yaml"
12+
output_dir = tmp_path / "output"
13+
output_dir.mkdir(parents=True, exist_ok=True)
14+
15+
result = subprocess.run(
16+
[
17+
"python",
18+
"-m",
19+
"graphgen.generate",
20+
"--config_file",
21+
str(config_path),
22+
"--output_dir",
23+
str(output_dir),
24+
],
25+
capture_output=True,
26+
text=True,
27+
check=False,
28+
)
29+
assert result.returncode == 0, f"Script failed with error: {result.stderr}"
30+
31+
data_root = output_dir / "data" / "graphgen"
32+
assert data_root.exists(), f"{data_root} does not exist"
33+
run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
34+
assert run_folders, f"No run folders found in {data_root}"
35+
run_folder = run_folders[0]
36+
37+
config_saved = run_folder / "config.yaml"
38+
assert config_saved.exists(), f"{config_saved} not found"
39+
40+
json_files = list(run_folder.glob("*.json"))
41+
assert json_files, f"No JSON output found in {run_folder}"
42+
43+
log_files = list(run_folder.glob("*.log"))
44+
assert log_files, "No log file generated"
45+
46+
with open(json_files[0], "r", encoding="utf-8") as f:
47+
data = json.load(f)
48+
assert (
49+
isinstance(data, list) and len(data) > 0
50+
), "JSON output is empty or not a list"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import json
2+
import os
3+
import subprocess
4+
from pathlib import Path
5+
6+
7+
def test_generate_aggregated(tmp_path: Path):
8+
repo_root = Path(__file__).resolve().parents[2]
9+
os.chdir(repo_root)
10+
11+
config_path = repo_root / "graphgen" / "configs" / "multi_hop_config.yaml"
12+
output_dir = tmp_path / "output"
13+
output_dir.mkdir(parents=True, exist_ok=True)
14+
15+
result = subprocess.run(
16+
[
17+
"python",
18+
"-m",
19+
"graphgen.generate",
20+
"--config_file",
21+
str(config_path),
22+
"--output_dir",
23+
str(output_dir),
24+
],
25+
capture_output=True,
26+
text=True,
27+
check=False,
28+
)
29+
assert result.returncode == 0, f"Script failed with error: {result.stderr}"
30+
31+
data_root = output_dir / "data" / "graphgen"
32+
assert data_root.exists() and data_root.is_dir(), f"{data_root} does not exist or is not a directory"
33+
run_folders = sorted(list(data_root.iterdir()), key=lambda p: p.name, reverse=True)
34+
assert run_folders, f"No run folders found in {data_root}"
35+
run_folder = run_folders[0]
36+
37+
config_saved = run_folder / "config.yaml"
38+
assert config_saved.exists(), f"{config_saved} not found"
39+
40+
json_files = list(run_folder.glob("*.json"))
41+
assert json_files, f"No JSON output found in {run_folder}"
42+
43+
log_files = list(run_folder.glob("*.log"))
44+
assert log_files, "No log file generated"
45+
46+
with open(json_files[0], "r", encoding="utf-8") as f:
47+
data = json.load(f)
48+
assert (
49+
isinstance(data, list) and len(data) > 0
50+
), "JSON output is empty or not a list"

0 commit comments

Comments
 (0)