1+ import os
2+ import yaml
3+ from pathlib import Path
4+ from typing import Dict , List , Union , Tuple
5+
6+
7+ def infer_title_from_file_dir_name (filename : str ) -> str :
8+ """
9+ Infers a human-readable title from a filename, removing leading numeric prefixes.
10+
11+ Parameters
12+ ----------
13+ filename : str
14+ The filename to infer the title from.
15+
16+ Returns
17+ -------
18+ str
19+ A human-readable title generated from the filename.
20+ """
21+ # Remove leading numbers and underscores if they exist
22+ name = os .path .splitext (filename )[0 ]
23+ parts = name .split ("_" , 1 )
24+ title = parts [1 ] if parts [0 ].isdigit () and len (parts ) > 1 else name
25+ return title .replace ("_" , " " ).title ()
26+
27+
28+ def infer_component_metadata (file : Path ) -> Dict [str , Union [str , None ]]:
29+ """
30+ Infers metadata for a file, including component type, plot type, and additional fields.
31+
32+ Parameters
33+ ----------
34+ file : Path
35+ The file to analyze.
36+
37+ Returns
38+ -------
39+ Dict[str, Union[str, None]]
40+ A dictionary containing inferred metadata.
41+ """
42+ ext = file .suffix .lower ()
43+ metadata = {}
44+
45+ # Infer component type
46+ if ext in [".png" , ".jpg" , ".jpeg" , ".gif" , ".html" , ".graphml" , ".gml" , ".gexf" , ".cyjs" ]:
47+ metadata ["component_type" ] = "plot"
48+ if ext in [".png" , ".jpg" , ".jpeg" , ".gif" ]:
49+ metadata ["plot_type" ] = "static"
50+ else :
51+ metadata ["plot_type" ] = "interactive_network"
52+ elif ext == ".json" :
53+ metadata ["component_type" ] = "plot"
54+ if "plotly" in file .stem .lower ():
55+ metadata ["plot_type" ] = "plotly"
56+ elif "altair" in file .stem .lower ():
57+ metadata ["plot_type" ] = "altair"
58+ else :
59+ metadata ["plot_type" ] = "unknown"
60+ elif ext in [".csv" , ".txt" , ".xls" , ".xlsx" , ".parquet" ]:
61+ metadata ["component_type" ] = "dataframe"
62+ metadata ["file_format" ] = ext .lstrip ("." )
63+ if ext == ".csv" :
64+ metadata ["delimiter" ] = ","
65+ elif ext == ".txt" :
66+ metadata ["delimiter" ] = "\\ t"
67+ elif ext == ".md" :
68+ metadata ["component_type" ] = "markdown"
69+ else :
70+ metadata ["component_type" ] = "unknown"
71+
72+ return metadata
73+
74+ def sort_items_by_number_prefix (items : List [Path ]) -> List [Path ]:
75+ """
76+ Sorts a list of Paths by numeric prefixes in their names, placing non-numeric items at the end.
77+
78+ Parameters
79+ ----------
80+ items : List[Path]
81+ The list of Path objects to sort.
82+
83+ Returns
84+ -------
85+ List[Path]
86+ The sorted list of Path objects.
87+ """
88+ def get_sort_key (item : Path ) -> tuple :
89+ parts = item .name .split ("_" , 1 )
90+ if parts [0 ].isdigit ():
91+ numeric_prefix = int (parts [0 ])
92+ else :
93+ numeric_prefix = float ('inf' ) # Non-numeric prefixes go to the end
94+ return numeric_prefix , item .name .lower () # Use `.lower()` for consistent sorting
95+
96+ return sorted (items , key = get_sort_key )
97+
98+ def generate_subsection_data (subsection_folder : Path , base_folder : Path ) -> Dict [str , Union [str , List [Dict ]]]:
99+ """
100+ Generates data for a single subsection.
101+
102+ Parameters
103+ ----------
104+ subsection_folder : Path
105+ Path to the subsection folder.
106+ base_folder : Path
107+ The base folder path to ensure proper path calculation.
108+
109+ Returns
110+ -------
111+ Dict[str, Union[str, List[Dict]]]
112+ The subsection data.
113+ """
114+ subsection_data = {
115+ "title" : infer_title_from_file_dir_name (subsection_folder .name ),
116+ "description" : "" ,
117+ "components" : [],
118+ }
119+
120+ # Sort files by number prefix
121+ sorted_files = sort_items_by_number_prefix (list (subsection_folder .iterdir ()))
122+
123+ for file in sorted_files :
124+ if file .is_file ():
125+ metadata = infer_component_metadata (file )
126+
127+ # Ensure the file path is absolute and relative to base_folder
128+ file_path = file .resolve () # Get the absolute path
129+
130+ # The relative path from base_folder is now absolute to the folder structure
131+ component_data = {
132+ "title" : infer_title_from_file_dir_name (file .name ),
133+ "file_path" : str (file_path ), # Use the absolute file path here
134+ "description" : "" ,
135+ }
136+
137+ # Merge inferred metadata into component data
138+ component_data .update (metadata )
139+
140+ subsection_data ["components" ].append (component_data )
141+
142+ return subsection_data
143+
144+
145+ def generate_section_data (section_folder : Path , base_folder : Path ) -> Dict [str , Union [str , List [Dict ]]]:
146+ """
147+ Generates data for a single section.
148+
149+ Parameters
150+ ----------
151+ section_folder : Path
152+ Path to the section folder.
153+ base_folder : Path
154+ The base folder path to ensure proper path calculation.
155+
156+ Returns
157+ -------
158+ Dict[str, Union[str, List[Dict]]]
159+ The section data.
160+ """
161+ section_data = {
162+ "title" : infer_title_from_file_dir_name (section_folder .name ),
163+ "description" : "" ,
164+ "subsections" : [],
165+ }
166+
167+ # Sort subsections by number prefix
168+ sorted_subsections = sort_items_by_number_prefix (list (section_folder .iterdir ()))
169+
170+ for subsection_folder in sorted_subsections :
171+ if subsection_folder .is_dir ():
172+ section_data ["subsections" ].append (generate_subsection_data (subsection_folder , base_folder ))
173+
174+ return section_data
175+
176+
177+ def resolve_base_folder (base_folder : str ) -> Path :
178+ """
179+ Resolves the provided base folder to an absolute path from the root, accounting for relative paths.
180+
181+ Parameters
182+ ----------
183+ base_folder : str
184+ The relative or absolute path to the base folder.
185+
186+ Returns
187+ -------
188+ Path
189+ The absolute path to the base folder.
190+ """
191+ # Check if we are in a subdirectory and need to go up one level
192+ project_dir = Path (__file__ ).resolve ().parents [1 ]
193+
194+ # If the base_folder is a relative path, resolve it from the project root
195+ base_folder_path = project_dir / base_folder
196+
197+ # Make sure the resolved base folder exists
198+ if not base_folder_path .is_dir ():
199+ raise ValueError (f"Base folder '{ base_folder } ' does not exist or is not a directory." )
200+
201+ return base_folder_path
202+
203+
204+ def generate_yaml_structure (folder : str ) -> Tuple [Dict [str , Union [str , List [Dict ]]], Path ]:
205+ """
206+ Generates a YAML-compatible structure from a folder hierarchy and returns the resolved folder path.
207+
208+ Parameters
209+ ----------
210+ folder : str
211+ The base folder containing section and subsection folders.
212+
213+ Returns
214+ -------
215+ Tuple[Dict[str, Union[str, List[Dict]]], Path]
216+ The YAML-compatible structure and the resolved folder path.
217+ """
218+ folder_path = resolve_base_folder (folder ) # Resolve the base folder path
219+
220+ # Generate the YAML structure
221+ yaml_structure = {
222+ "report" : {
223+ "title" : infer_title_from_file_dir_name (folder_path .name ),
224+ "description" : "" ,
225+ "graphical_abstract" : "" ,
226+ "logo" : "" ,
227+ },
228+ "sections" : [],
229+ }
230+
231+ # Sort sections by their number prefix
232+ sorted_sections = sort_items_by_number_prefix (list (folder_path .iterdir ()))
233+
234+ for section_folder in sorted_sections :
235+ if section_folder .is_dir ():
236+ yaml_structure ["sections" ].append (generate_section_data (section_folder , folder_path ))
237+
238+ return yaml_structure , folder_path
239+
240+ def write_yaml_to_file (yaml_data : Dict , folder_path : Path ) -> None :
241+ """
242+ Writes the generated YAML structure to a file.
243+
244+ Parameters
245+ ----------
246+ yaml_data : Dict
247+ The YAML data to write.
248+ folder_path : Path
249+ The path where the YAML file should be saved.
250+
251+ Returns
252+ -------
253+ None
254+ """
255+ assert isinstance (yaml_data , dict ), "YAML data must be a dictionary."
256+
257+ # Generate the output YAML file path based on the folder name
258+ output_yaml = folder_path / (folder_path .name + "_config.yaml" )
259+
260+ # Ensure the directory exists (but don't create a new folder)
261+ if not folder_path .exists ():
262+ raise FileNotFoundError (f"The directory { folder_path } does not exist." )
263+
264+ # Now write the YAML file
265+ with open (output_yaml , "w" ) as yaml_file :
266+ yaml .dump (yaml_data , yaml_file , default_flow_style = False , sort_keys = False )
267+
268+ print (f"YAML file has been written to { output_yaml } " )
0 commit comments