-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproblem.py
More file actions
146 lines (120 loc) · 5.47 KB
/
problem.py
File metadata and controls
146 lines (120 loc) · 5.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import base64
import re
from pydantic import BaseModel, Field
import logging
from pathlib import Path
from typing import Optional, List
def _encode_image(image_path):
with open(image_path, "rb") as image_file:
img = base64.b64encode(image_file.read()).decode("utf-8")
return f"data:image/jpeg;base64,{img}"
def _find_used_images(description_text: str, folder_path: Path) -> list[Path]:
all_images = list(folder_path.glob('*.jpg'))
photo_ids = set(re.findall(r'{{PHOTO_ID:(\d+)', description_text))
markdown_images = set(re.findall(r'!\[.*?\]\((.*?\.jpg)\)', description_text))
used_images = [
img for img in all_images
if img.stem in photo_ids or img.name in markdown_images
]
return used_images
def _replace_img_links(description_text: str, image_paths: list[Path]) -> str:
for image_path in image_paths:
image_id = image_path.stem
old_ref = f"{{{{PHOTO_ID:{image_id}|WIDTH:600}}}}"
new_ref = f""
description_text = description_text.replace(old_ref, new_ref)
return description_text
class Problem(BaseModel):
folder_path: Path = Field(..., description="The path to the problem directory")
name: str = Field(..., description="The name of the problem")
problem_description: str = Field(..., description="The description of the problem")
sample_input_path: Path = Field(..., description="The path to the sample input of the problem")
sample_output_path: Path = Field(..., description="The path to the sample output of the problem")
input_path: Path = Field(..., description="The path to the input file")
output_path: Path = Field(..., description="The path to the output file")
code: Optional[str] = None
images: list[str] = Field(default_factory=list)
def __post_init__(self):
self._process_description_and_images()
def _process_description_and_images(self):
used_images = _find_used_images(self.problem_description, self.folder_path)
self.problem_description = _replace_img_links(self.problem_description, used_images)
self.images = [_encode_image(str(image_path)) for image_path in used_images]
def get_sample_input(self) -> str:
return self.sample_input_path.read_text()
def get_sample_output(self) -> str:
return self.sample_output_path.read_text()
def get_input(self) -> str:
return self.input_path.read_text()
def get_output(self) -> str:
return self.output_path.read_text()
@classmethod
def from_name(cls, name: str, folder_path: Path):
# description_path = folder_path / f"{name}.md"
# input_path = folder_path / f"{name}.in"
# output_path = folder_path / f"{name}.out"
# sample_input_path = folder_path / f"{name}_sample_input.txt"
# sample_output_path = folder_path / f"{name}_sample_output.txt"
description_path = folder_path / "statement.txt"
input_path = folder_path / "full_in.txt"
output_path = folder_path / "full_out.txt"
sample_input_path = folder_path / "sample_in.txt"
sample_output_path = folder_path / "sample_out.txt"
return cls.from_files(
name=name,
description_path=description_path,
sample_input_path=sample_input_path,
sample_output_path=sample_output_path,
input_path=input_path,
)
@classmethod
def from_files(cls, name: str, description_path: Path, sample_input_path: Path,
sample_output_path: Path, input_path: Path, output_path: Path = None):
return cls(
name=name,
problem_description=description_path.read_text(),
sample_input_path=sample_input_path,
sample_output_path=sample_output_path,
input_path=input_path,
output_path=output_path if output_path else input_path.with_suffix('.out'),
folder_path=input_path.parent,
)
def __str__(self):
return (
f"Problem: {self.name}\n"
f"Description: {self.problem_description[:50]}...\n"
f"Sample Input: {self.sample_input_path[:50]}...\n"
f"Sample Output: {self.sample_output_path[:50]}...\n"
f"Input Path: {self.input_path}\n"
f"Output Path: {self.output_path}\n"
f"Images: {len(self.images)} image(s)\n"
)
def find_problems(folder: Path) -> list[dict]:
"""
Find all the problems in the given folder.
"""
problems = []
# search for all files ending in .in
input_files = list(folder.rglob("**/*.in"))
for input_file in input_files:
try:
problem_name = input_file.stem
problem_folder = input_file.parent
problems.append(Problem.from_name(problem_name, problem_folder))
except Exception as e:
logging.error(f"Error loading problem {problem_name}: {e}")
logging.info(f"Found {len(problems)} problems")
return problems
if __name__ == "__main__":
problem_name ="cheeseburger_corollary_ch1"
folder_path = Path("../dataset/2023/practice/")
# load 1 problem by name
problem = Problem.from_name(
problem_name, folder_path
)
print(problem)
# load all problems in folder
folder_path = Path("../dataset/2023/")
problems = find_problems(folder_path)
print(f"Found {len(problems)} problems in folder: {folder_path}")
assert len(problems) == 29, f"Expected 29 problems, got {len(problems)}"