From f118d97f5573ac95a6667d133a432d70dac6c0e6 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Mon, 9 Dec 2024 12:21:58 +0100 Subject: [PATCH 1/9] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor(quarto=5Frepo?= =?UTF-8?q?rtview):=20Update=20=5Fgenerate=5Fimage=5Fcontent=20method=20to?= =?UTF-8?q?=20use=20html=20code=20instead=20=20=20of=20md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was added to support absolute paths, the md code did not recognize absolute paths --- .gitignore | 4 ++-- vuegen/quarto_reportview.py | 17 +++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 6f52628..343c466 100644 --- a/.gitignore +++ b/.gitignore @@ -127,5 +127,5 @@ UML_diagrams/ Graphical_abstract/ docs/presentations/ test.py -vuegen/yaml_generator.py -MicW2Graph_config.yaml \ No newline at end of file +vuegen/config_generator.py +example_data/MicW2Graph/MicW2Graph_config.yaml \ No newline at end of file diff --git a/vuegen/quarto_reportview.py b/vuegen/quarto_reportview.py index 954e9e2..219dc29 100644 --- a/vuegen/quarto_reportview.py +++ b/vuegen/quarto_reportview.py @@ -475,7 +475,7 @@ def _generate_markdown_content(self, markdown) -> List[str]: def _generate_image_content(self, image_path: str, alt_text: str = "", width: int = 650, height: int = 400) -> str: """ - Adds an image to the content list in a centered format with a specified width. + Adds an image to the content list in an HTML format with a specified width and height. Parameters ---------- @@ -484,20 +484,25 @@ def _generate_image_content(self, image_path: str, alt_text: str = "", width: in width : int, optional Width of the image in pixels (default is 650). height : int, optional - Height of the image in pixels (default is 500). + Height of the image in pixels (default is 400). alt_text : str, optional Alternative text for the image (default is an empty string). Returns ------- str - The formatted image content. + The formatted HTML image content. """ - # Check if the image path is a URL or a local file path if is_url(image_path): - return f"""![{alt_text}]({image_path}){{ width={width}px height={height}px fig-align="center"}}\n""" + src = image_path else: - return f"""![{alt_text}]({os.path.join('..', image_path)}){{ width={width}px height={height}px fig-align="center"}}\n""" + src = os.path.abspath(image_path) + + # Return the HTML content + return f""" +
+{alt_text} +
\n""" def _show_dataframe(self, dataframe, is_report_static, static_dir: str = STATIC_FILES_DIR) -> List[str]: """ From 621763542a6b9d7ceb2c759c8dbd869858c08252 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Mon, 9 Dec 2024 12:42:25 +0100 Subject: [PATCH 2/9] =?UTF-8?q?=F0=9F=9A=A7=20Test(config=5Fgenerator.py):?= =?UTF-8?q?=20Add=20config=20generator=20as=20a=20module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First version of the config generator as and independent module --- .gitignore | 4 +- .../MicW2Graph/MicW2Graph_config.yaml | 105 +++++++ vuegen/config_generator.py | 268 ++++++++++++++++++ 3 files changed, 374 insertions(+), 3 deletions(-) create mode 100644 example_data/MicW2Graph/MicW2Graph_config.yaml create mode 100644 vuegen/config_generator.py diff --git a/.gitignore b/.gitignore index 343c466..cc38bf7 100644 --- a/.gitignore +++ b/.gitignore @@ -126,6 +126,4 @@ quarto_report/ UML_diagrams/ Graphical_abstract/ docs/presentations/ -test.py -vuegen/config_generator.py -example_data/MicW2Graph/MicW2Graph_config.yaml \ No newline at end of file +test.py \ No newline at end of file diff --git a/example_data/MicW2Graph/MicW2Graph_config.yaml b/example_data/MicW2Graph/MicW2Graph_config.yaml new file mode 100644 index 0000000..77a4337 --- /dev/null +++ b/example_data/MicW2Graph/MicW2Graph_config.yaml @@ -0,0 +1,105 @@ +report: + title: Micw2Graph + description: '' + graphical_abstract: '' + logo: '' +sections: +- title: Exploratory Data Analysis + description: '' + subsections: + - title: Abundance Data + description: '' + components: + - title: Top Species Plot Biome Plotly + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/1_top_species_plot_biome_plotly.json + description: '' + component_type: plot + plot_type: plotly + - title: Multilineplot Altair + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/2_multilineplot_altair.json + description: '' + component_type: plot + plot_type: altair + - title: Abundance Data Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/3_abundance_data_allbiomes.csv + description: '' + component_type: dataframe + file_format: csv + delimiter: ',' + - title: Abundance Data Allbiomes xls + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/4_abundance_data_allbiomes.xls + description: '' + component_type: dataframe + file_format: xls + - title: Sample Data + description: '' + components: + - title: Pie Plot Countries Plotly + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/1_pie_plot_countries_plotly.json + description: '' + component_type: plot + plot_type: plotly + - title: Pie Plots Biomes Plotly + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/2_pie_plots_biomes_plotly.json + description: '' + component_type: plot + plot_type: plotly + - title: Number Samples Per Study + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/3_number_samples_per_study.png + description: '' + component_type: plot + plot_type: static + - title: Sample Info Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/4_sample_info_allbiomes.parquet + description: '' + component_type: dataframe + file_format: parquet + - title: Sample Info Allbiomes txt + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/5_sample_info_allbiomes.txt + description: '' + component_type: dataframe + file_format: txt + delimiter: \t + - title: Extra Info + description: '' + components: + - title: Test Md + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/3_Extra_info/1_test_md.md + description: '' + component_type: markdown +- title: Microbial Association Networks + description: '' + subsections: + - title: Network Visualization1 + description: '' + components: + - title: Man Example + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/1_Network_visualization1/1_man_example.graphml + description: '' + component_type: plot + plot_type: interactive_network + - title: Network Visualization2 + description: '' + components: + - title: Man Example + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example.csv + description: '' + component_type: dataframe + file_format: csv + delimiter: ',' + - title: Network Visualization3 + description: '' + components: + - title: Man Example + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/3_Network_visualization3/1_man_example.cyjs + description: '' + component_type: plot + plot_type: interactive_network + - title: Network Visualization4 + description: '' + components: + - title: Ckg Network + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/4_Network_visualization4/1_ckg_network.html + description: '' + component_type: plot + plot_type: interactive_network diff --git a/vuegen/config_generator.py b/vuegen/config_generator.py new file mode 100644 index 0000000..f58e9d7 --- /dev/null +++ b/vuegen/config_generator.py @@ -0,0 +1,268 @@ +import os +import yaml +from pathlib import Path +from typing import Dict, List, Union, Tuple + + +def infer_title_from_file_dir_name(filename: str) -> str: + """ + Infers a human-readable title from a filename, removing leading numeric prefixes. + + Parameters + ---------- + filename : str + The filename to infer the title from. + + Returns + ------- + str + A human-readable title generated from the filename. + """ + # Remove leading numbers and underscores if they exist + name = os.path.splitext(filename)[0] + parts = name.split("_", 1) + title = parts[1] if parts[0].isdigit() and len(parts) > 1 else name + return title.replace("_", " ").title() + + +def infer_component_metadata(file: Path) -> Dict[str, Union[str, None]]: + """ + Infers metadata for a file, including component type, plot type, and additional fields. + + Parameters + ---------- + file : Path + The file to analyze. + + Returns + ------- + Dict[str, Union[str, None]] + A dictionary containing inferred metadata. + """ + ext = file.suffix.lower() + metadata = {} + + # Infer component type + if ext in [".png", ".jpg", ".jpeg", ".gif", ".html", ".graphml", ".gml", ".gexf", ".cyjs"]: + metadata["component_type"] = "plot" + if ext in [".png", ".jpg", ".jpeg", ".gif"]: + metadata["plot_type"] = "static" + else: + metadata["plot_type"] = "interactive_network" + elif ext == ".json": + metadata["component_type"] = "plot" + if "plotly" in file.stem.lower(): + metadata["plot_type"] = "plotly" + elif "altair" in file.stem.lower(): + metadata["plot_type"] = "altair" + else: + metadata["plot_type"] = "unknown" + elif ext in [".csv", ".txt", ".xls", ".xlsx", ".parquet"]: + metadata["component_type"] = "dataframe" + metadata["file_format"] = ext.lstrip(".") + if ext == ".csv": + metadata["delimiter"] = "," + elif ext == ".txt": + metadata["delimiter"] = "\\t" + elif ext == ".md": + metadata["component_type"] = "markdown" + else: + metadata["component_type"] = "unknown" + + return metadata + +def sort_items_by_number_prefix(items: List[Path]) -> List[Path]: + """ + Sorts a list of Paths by numeric prefixes in their names, placing non-numeric items at the end. + + Parameters + ---------- + items : List[Path] + The list of Path objects to sort. + + Returns + ------- + List[Path] + The sorted list of Path objects. + """ + def get_sort_key(item: Path) -> tuple: + parts = item.name.split("_", 1) + if parts[0].isdigit(): + numeric_prefix = int(parts[0]) + else: + numeric_prefix = float('inf') # Non-numeric prefixes go to the end + return numeric_prefix, item.name.lower() # Use `.lower()` for consistent sorting + + return sorted(items, key=get_sort_key) + +def generate_subsection_data(subsection_folder: Path, base_folder: Path) -> Dict[str, Union[str, List[Dict]]]: + """ + Generates data for a single subsection. + + Parameters + ---------- + subsection_folder : Path + Path to the subsection folder. + base_folder : Path + The base folder path to ensure proper path calculation. + + Returns + ------- + Dict[str, Union[str, List[Dict]]] + The subsection data. + """ + subsection_data = { + "title": infer_title_from_file_dir_name(subsection_folder.name), + "description": "", + "components": [], + } + + # Sort files by number prefix + sorted_files = sort_items_by_number_prefix(list(subsection_folder.iterdir())) + + for file in sorted_files: + if file.is_file(): + metadata = infer_component_metadata(file) + + # Ensure the file path is absolute and relative to base_folder + file_path = file.resolve() # Get the absolute path + + # The relative path from base_folder is now absolute to the folder structure + component_data = { + "title": infer_title_from_file_dir_name(file.name), + "file_path": str(file_path), # Use the absolute file path here + "description": "", + } + + # Merge inferred metadata into component data + component_data.update(metadata) + + subsection_data["components"].append(component_data) + + return subsection_data + + +def generate_section_data(section_folder: Path, base_folder: Path) -> Dict[str, Union[str, List[Dict]]]: + """ + Generates data for a single section. + + Parameters + ---------- + section_folder : Path + Path to the section folder. + base_folder : Path + The base folder path to ensure proper path calculation. + + Returns + ------- + Dict[str, Union[str, List[Dict]]] + The section data. + """ + section_data = { + "title": infer_title_from_file_dir_name(section_folder.name), + "description": "", + "subsections": [], + } + + # Sort subsections by number prefix + sorted_subsections = sort_items_by_number_prefix(list(section_folder.iterdir())) + + for subsection_folder in sorted_subsections: + if subsection_folder.is_dir(): + section_data["subsections"].append(generate_subsection_data(subsection_folder, base_folder)) + + return section_data + + +def resolve_base_folder(base_folder: str) -> Path: + """ + Resolves the provided base folder to an absolute path from the root, accounting for relative paths. + + Parameters + ---------- + base_folder : str + The relative or absolute path to the base folder. + + Returns + ------- + Path + The absolute path to the base folder. + """ + # Check if we are in a subdirectory and need to go up one level + project_dir = Path(__file__).resolve().parents[1] + + # If the base_folder is a relative path, resolve it from the project root + base_folder_path = project_dir / base_folder + + # Make sure the resolved base folder exists + if not base_folder_path.is_dir(): + raise ValueError(f"Base folder '{base_folder}' does not exist or is not a directory.") + + return base_folder_path + + +def generate_yaml_structure(folder: str) -> Tuple[Dict[str, Union[str, List[Dict]]], Path]: + """ + Generates a YAML-compatible structure from a folder hierarchy and returns the resolved folder path. + + Parameters + ---------- + folder : str + The base folder containing section and subsection folders. + + Returns + ------- + Tuple[Dict[str, Union[str, List[Dict]]], Path] + The YAML-compatible structure and the resolved folder path. + """ + folder_path = resolve_base_folder(folder) # Resolve the base folder path + + # Generate the YAML structure + yaml_structure = { + "report": { + "title": infer_title_from_file_dir_name(folder_path.name), + "description": "", + "graphical_abstract": "", + "logo": "", + }, + "sections": [], + } + + # Sort sections by their number prefix + sorted_sections = sort_items_by_number_prefix(list(folder_path.iterdir())) + + for section_folder in sorted_sections: + if section_folder.is_dir(): + yaml_structure["sections"].append(generate_section_data(section_folder, folder_path)) + + return yaml_structure, folder_path + +def write_yaml_to_file(yaml_data: Dict, folder_path: Path) -> None: + """ + Writes the generated YAML structure to a file. + + Parameters + ---------- + yaml_data : Dict + The YAML data to write. + folder_path : Path + The path where the YAML file should be saved. + + Returns + ------- + None + """ + assert isinstance(yaml_data, dict), "YAML data must be a dictionary." + + # Generate the output YAML file path based on the folder name + output_yaml = folder_path / (folder_path.name + "_config.yaml") + + # Ensure the directory exists (but don't create a new folder) + if not folder_path.exists(): + raise FileNotFoundError(f"The directory {folder_path} does not exist.") + + # Now write the YAML file + with open(output_yaml, "w") as yaml_file: + yaml.dump(yaml_data, yaml_file, default_flow_style=False, sort_keys=False) + + print(f"YAML file has been written to {output_yaml}") \ No newline at end of file From 6d0d40b3975e7c09c37ba4702f34603838be8b1b Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Mon, 9 Dec 2024 13:50:41 +0100 Subject: [PATCH 3/9] =?UTF-8?q?=F0=9F=90=9B=20Fix(config=5Fgenerator):=20u?= =?UTF-8?q?pdate=20infer=5Fcomponent=5Fmetadata=20to=20identify=20edgelist?= =?UTF-8?q?=20and=20adjlist=20nets=20stored=20as=20csv=20or=20txt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...example.csv => 1_man_example_edgelist.csv} | 0 .../MicW2Graph/MicW2Graph_config.yaml | 14 +++++------ vuegen/config_generator.py | 25 +++++++++++++------ 3 files changed, 25 insertions(+), 14 deletions(-) rename example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/{1_man_example.csv => 1_man_example_edgelist.csv} (100%) diff --git a/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example.csv b/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv similarity index 100% rename from example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example.csv rename to example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv diff --git a/example_data/MicW2Graph/MicW2Graph_config.yaml b/example_data/MicW2Graph/MicW2Graph_config.yaml index 77a4337..ea53130 100644 --- a/example_data/MicW2Graph/MicW2Graph_config.yaml +++ b/example_data/MicW2Graph/MicW2Graph_config.yaml @@ -26,7 +26,7 @@ sections: component_type: dataframe file_format: csv delimiter: ',' - - title: Abundance Data Allbiomes xls + - title: Abundance Data Allbiomes file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/4_abundance_data_allbiomes.xls description: '' component_type: dataframe @@ -54,7 +54,7 @@ sections: description: '' component_type: dataframe file_format: parquet - - title: Sample Info Allbiomes txt + - title: Sample Info Allbiomes file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/5_sample_info_allbiomes.txt description: '' component_type: dataframe @@ -81,12 +81,12 @@ sections: - title: Network Visualization2 description: '' components: - - title: Man Example - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example.csv + - title: Man Example Edgelist + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv description: '' - component_type: dataframe - file_format: csv - delimiter: ',' + component_type: plot + plot_type: interactive_network + csv_network_format: edgelist - title: Network Visualization3 description: '' components: diff --git a/vuegen/config_generator.py b/vuegen/config_generator.py index f58e9d7..75440e7 100644 --- a/vuegen/config_generator.py +++ b/vuegen/config_generator.py @@ -57,13 +57,23 @@ def infer_component_metadata(file: Path) -> Dict[str, Union[str, None]]: metadata["plot_type"] = "altair" else: metadata["plot_type"] = "unknown" - elif ext in [".csv", ".txt", ".xls", ".xlsx", ".parquet"]: + elif ext in [".csv", ".txt"]: + # Check for network-related keywords + if "edgelist" in file.stem.lower(): + metadata["component_type"] = "plot" + metadata["plot_type"] = "interactive_network" + metadata["csv_network_format"] = "edgelist" + elif "adjlist" in file.stem.lower(): + metadata["component_type"] = "plot" + metadata["plot_type"] = "interactive_network" + metadata["csv_network_format"] = "adjlist" + else: + metadata["component_type"] = "dataframe" + metadata["file_format"] = ext.lstrip(".") + metadata["delimiter"] = "," if ext == ".csv" else "\\t" + elif ext in [".xls", ".xlsx", ".parquet"]: metadata["component_type"] = "dataframe" metadata["file_format"] = ext.lstrip(".") - if ext == ".csv": - metadata["delimiter"] = "," - elif ext == ".txt": - metadata["delimiter"] = "\\t" elif ext == ".md": metadata["component_type"] = "markdown" else: @@ -90,8 +100,9 @@ def get_sort_key(item: Path) -> tuple: if parts[0].isdigit(): numeric_prefix = int(parts[0]) else: - numeric_prefix = float('inf') # Non-numeric prefixes go to the end - return numeric_prefix, item.name.lower() # Use `.lower()` for consistent sorting + # Non-numeric prefixes go to the end + numeric_prefix = float('inf') + return numeric_prefix, item.name.lower() return sorted(items, key=get_sort_key) From 4a8512ca79ba9172331a42c245db6bff3764dbe3 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Mon, 9 Dec 2024 15:12:57 +0100 Subject: [PATCH 4/9] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor(config=5Fgene?= =?UTF-8?q?rator):=20Update=20infer=5Fcomponent=5Fmetadata=20function=20to?= =?UTF-8?q?=20use=20Enums=20for=20different=20component=20types?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vuegen/config_generator.py | 75 +++++++++++++++++++++++--------------- vuegen/report.py | 4 ++ 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/vuegen/config_generator.py b/vuegen/config_generator.py index 75440e7..1a9056f 100644 --- a/vuegen/config_generator.py +++ b/vuegen/config_generator.py @@ -1,6 +1,7 @@ import os import yaml from pathlib import Path +import report as r from typing import Dict, List, Union, Tuple @@ -25,7 +26,7 @@ def infer_title_from_file_dir_name(filename: str) -> str: return title.replace("_", " ").title() -def infer_component_metadata(file: Path) -> Dict[str, Union[str, None]]: +def infer_component_metadata(file: Path, logger=None) -> Dict[str, Union[str, None]]: """ Infers metadata for a file, including component type, plot type, and additional fields. @@ -33,6 +34,8 @@ def infer_component_metadata(file: Path) -> Dict[str, Union[str, None]]: ---------- file : Path The file to analyze. + logger : optional + Logger to record errors and warnings. Returns ------- @@ -42,42 +45,56 @@ def infer_component_metadata(file: Path) -> Dict[str, Union[str, None]]: ext = file.suffix.lower() metadata = {} - # Infer component type - if ext in [".png", ".jpg", ".jpeg", ".gif", ".html", ".graphml", ".gml", ".gexf", ".cyjs"]: - metadata["component_type"] = "plot" - if ext in [".png", ".jpg", ".jpeg", ".gif"]: - metadata["plot_type"] = "static" + # Infer component type and metadata + if ext in [r.DataFrameFormat.CSV.value_with_dot, r.DataFrameFormat.TXT.value_with_dot]: + # Check for network-related keywords + if "edgelist" in file.stem.lower(): + metadata["component_type"] = r.ComponentType.PLOT.value + metadata["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + metadata["csv_network_format"] = r.CSVNetworkFormat.EDGELIST.value + elif "adjlist" in file.stem.lower(): + metadata["component_type"] = r.ComponentType.PLOT.value + metadata["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + metadata["csv_network_format"] = r.CSVNetworkFormat.ADJLIST.value + else: + metadata["component_type"] = r.ComponentType.DATAFRAME.value + metadata["file_format"] = r.DataFrameFormat.CSV.value if ext == r.DataFrameFormat.CSV.value_with_dot else r.DataFrameFormat.TXT.value + metadata["delimiter"] = "," if ext == r.DataFrameFormat.CSV.value_with_dot else "\\t" + elif ext in [fmt.value_with_dot for fmt in r.DataFrameFormat if fmt not in [r.DataFrameFormat.CSV, r.DataFrameFormat.TXT]]: + metadata["component_type"] = r.ComponentType.DATAFRAME.value + metadata["file_format"] = next(fmt.value for fmt in r.DataFrameFormat if fmt.value_with_dot == ext) + elif ext in [fmt.value_with_dot for fmt in r.NetworkFormat]: + metadata["component_type"] = r.ComponentType.PLOT.value + if ext in [ + r.NetworkFormat.PNG.value_with_dot, + r.NetworkFormat.JPG.value_with_dot, + r.NetworkFormat.JPEG.value_with_dot, + r.NetworkFormat.SVG.value_with_dot, + ]: + metadata["plot_type"] = r.PlotType.STATIC.value else: - metadata["plot_type"] = "interactive_network" + metadata["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value elif ext == ".json": - metadata["component_type"] = "plot" + metadata["component_type"] = r.ComponentType.PLOT.value if "plotly" in file.stem.lower(): - metadata["plot_type"] = "plotly" + metadata["plot_type"] = r.PlotType.PLOTLY.value elif "altair" in file.stem.lower(): - metadata["plot_type"] = "altair" + metadata["plot_type"] = r.PlotType.ALTAIR.value else: metadata["plot_type"] = "unknown" - elif ext in [".csv", ".txt"]: - # Check for network-related keywords - if "edgelist" in file.stem.lower(): - metadata["component_type"] = "plot" - metadata["plot_type"] = "interactive_network" - metadata["csv_network_format"] = "edgelist" - elif "adjlist" in file.stem.lower(): - metadata["component_type"] = "plot" - metadata["plot_type"] = "interactive_network" - metadata["csv_network_format"] = "adjlist" - else: - metadata["component_type"] = "dataframe" - metadata["file_format"] = ext.lstrip(".") - metadata["delimiter"] = "," if ext == ".csv" else "\\t" - elif ext in [".xls", ".xlsx", ".parquet"]: - metadata["component_type"] = "dataframe" - metadata["file_format"] = ext.lstrip(".") elif ext == ".md": - metadata["component_type"] = "markdown" + metadata["component_type"] = r.ComponentType.MARKDOWN.value else: - metadata["component_type"] = "unknown" + # Unified error for unsupported extensions + error_msg = ( + f"Unsupported file extension: {ext}. " + f"Supported extensions include:\n" + f" - Network formats: {', '.join(fmt.value_with_dot for fmt in r.NetworkFormat)}\n" + f" - DataFrame formats: {', '.join(fmt.value_with_dot for fmt in r.DataFrameFormat)}" + ) + if logger: + logger.error(error_msg) + raise ValueError(error_msg) return metadata diff --git a/vuegen/report.py b/vuegen/report.py index a85830b..3926751 100644 --- a/vuegen/report.py +++ b/vuegen/report.py @@ -44,6 +44,10 @@ class NetworkFormat(StrEnum): TXT = auto() CYJS = auto() HTML = auto() + PNG = auto() + JPG = auto() + JPEG = auto() + SVG = auto() @property def value_with_dot(self): From 8a389441f57b6711ae3adc614020a239e06d8f33 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Mon, 9 Dec 2024 15:34:38 +0100 Subject: [PATCH 5/9] =?UTF-8?q?=F0=9F=90=9B=20Fix(report):=20Add=20XLSX=20?= =?UTF-8?q?format=20in=20DataFrameFormat=20Enum=20and=20update=20st=20and?= =?UTF-8?q?=20quarto=20reports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vuegen/config_generator.py | 8 +++----- vuegen/quarto_reportview.py | 3 ++- vuegen/report.py | 1 + vuegen/streamlit_reportview.py | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/vuegen/config_generator.py b/vuegen/config_generator.py index 1a9056f..56065f6 100644 --- a/vuegen/config_generator.py +++ b/vuegen/config_generator.py @@ -26,7 +26,7 @@ def infer_title_from_file_dir_name(filename: str) -> str: return title.replace("_", " ").title() -def infer_component_metadata(file: Path, logger=None) -> Dict[str, Union[str, None]]: +def infer_component_metadata(file: Path, logger=None) -> Dict[str, str]: """ Infers metadata for a file, including component type, plot type, and additional fields. @@ -39,7 +39,7 @@ def infer_component_metadata(file: Path, logger=None) -> Dict[str, Union[str, No Returns ------- - Dict[str, Union[str, None]] + Dict[str,str] A dictionary containing inferred metadata. """ ext = file.suffix.lower() @@ -123,7 +123,7 @@ def get_sort_key(item: Path) -> tuple: return sorted(items, key=get_sort_key) -def generate_subsection_data(subsection_folder: Path, base_folder: Path) -> Dict[str, Union[str, List[Dict]]]: +def generate_subsection_data(subsection_folder: Path) -> Dict[str, Union[str, List[Dict]]]: """ Generates data for a single subsection. @@ -131,8 +131,6 @@ def generate_subsection_data(subsection_folder: Path, base_folder: Path) -> Dict ---------- subsection_folder : Path Path to the subsection folder. - base_folder : Path - The base folder path to ensure proper path calculation. Returns ------- diff --git a/vuegen/quarto_reportview.py b/vuegen/quarto_reportview.py index 219dc29..2bfd862 100644 --- a/vuegen/quarto_reportview.py +++ b/vuegen/quarto_reportview.py @@ -396,7 +396,8 @@ def _generate_dataframe_content(self, dataframe, is_report_static) -> List[str]: r.DataFrameFormat.CSV.value_with_dot: pd.read_csv, r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet, r.DataFrameFormat.TXT.value_with_dot: pd.read_table, - r.DataFrameFormat.XLS.value_with_dot: pd.read_excel + r.DataFrameFormat.XLS.value_with_dot: pd.read_excel, + r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel } try: # Check if the file extension matches any DataFrameFormat value diff --git a/vuegen/report.py b/vuegen/report.py index 3926751..f9b343b 100644 --- a/vuegen/report.py +++ b/vuegen/report.py @@ -63,6 +63,7 @@ class DataFrameFormat(StrEnum): TXT = auto() PARQUET = auto() XLS = auto() + XLSX = auto() @property def value_with_dot(self): diff --git a/vuegen/streamlit_reportview.py b/vuegen/streamlit_reportview.py index a64b178..6da24ce 100644 --- a/vuegen/streamlit_reportview.py +++ b/vuegen/streamlit_reportview.py @@ -429,7 +429,8 @@ def _generate_dataframe_content(self, dataframe) -> List[str]: r.DataFrameFormat.CSV.value_with_dot: pd.read_csv, r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet, r.DataFrameFormat.TXT.value_with_dot: pd.read_table, - r.DataFrameFormat.XLS.value_with_dot: pd.read_excel + r.DataFrameFormat.XLS.value_with_dot: pd.read_excel, + r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel } try: From d81f9b38c3fab1837307d679e5d2f715090b6c35 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Mon, 9 Dec 2024 16:12:11 +0100 Subject: [PATCH 6/9] =?UTF-8?q?=F0=9F=9A=9A=20Refactor(config=5Fgenerator)?= =?UTF-8?q?:=20Move=20write=5Fyaml=5Fconfig=20function=20to=20utils?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vuegen/config_generator.py | 35 ++--------------------------------- vuegen/utils.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/vuegen/config_generator.py b/vuegen/config_generator.py index 56065f6..e6f33cb 100644 --- a/vuegen/config_generator.py +++ b/vuegen/config_generator.py @@ -1,5 +1,4 @@ import os -import yaml from pathlib import Path import report as r from typing import Dict, List, Union, Tuple @@ -195,7 +194,7 @@ def generate_section_data(section_folder: Path, base_folder: Path) -> Dict[str, for subsection_folder in sorted_subsections: if subsection_folder.is_dir(): - section_data["subsections"].append(generate_subsection_data(subsection_folder, base_folder)) + section_data["subsections"].append(generate_subsection_data(subsection_folder)) return section_data @@ -261,34 +260,4 @@ def generate_yaml_structure(folder: str) -> Tuple[Dict[str, Union[str, List[Dict if section_folder.is_dir(): yaml_structure["sections"].append(generate_section_data(section_folder, folder_path)) - return yaml_structure, folder_path - -def write_yaml_to_file(yaml_data: Dict, folder_path: Path) -> None: - """ - Writes the generated YAML structure to a file. - - Parameters - ---------- - yaml_data : Dict - The YAML data to write. - folder_path : Path - The path where the YAML file should be saved. - - Returns - ------- - None - """ - assert isinstance(yaml_data, dict), "YAML data must be a dictionary." - - # Generate the output YAML file path based on the folder name - output_yaml = folder_path / (folder_path.name + "_config.yaml") - - # Ensure the directory exists (but don't create a new folder) - if not folder_path.exists(): - raise FileNotFoundError(f"The directory {folder_path} does not exist.") - - # Now write the YAML file - with open(output_yaml, "w") as yaml_file: - yaml.dump(yaml_data, yaml_file, default_flow_style=False, sort_keys=False) - - print(f"YAML file has been written to {output_yaml}") \ No newline at end of file + return yaml_structure, folder_path \ No newline at end of file diff --git a/vuegen/utils.py b/vuegen/utils.py index 69429ed..802547f 100644 --- a/vuegen/utils.py +++ b/vuegen/utils.py @@ -11,6 +11,7 @@ from enum import StrEnum from typing import Type from bs4 import BeautifulSoup +from pathlib import Path from urllib.parse import urlparse ## CHECKS @@ -419,6 +420,34 @@ def load_yaml_config(file_path: str) -> dict: return config +def write_yaml_config(yaml_data: dict, directory_path: Path) -> None: + """ + Writes the generated YAML structure to a file. + + Parameters + ---------- + yaml_data : dict + The YAML data to write. + directory_path : Path + The path where the YAML file should be saved. + + Returns + ------- + None + """ + assert isinstance(yaml_data, dict), "YAML data must be a dictionary." + + # Generate the output YAML file path based on the folder name + output_yaml = directory_path / (directory_path.name + "_config.yaml") + + # Ensure the directory exists (but don't create a new folder) + if not directory_path.exists(): + raise FileNotFoundError(f"The directory {directory_path} does not exist.") + + # Now write the YAML file + with open(output_yaml, "w") as yaml_file: + yaml.dump(yaml_data, yaml_file, default_flow_style=False, sort_keys=False) + ## LOGGING def get_basename(fname: None | str = None) -> str: """ From 1abfa2727044823fd29891dd67edbcda776fb442 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Tue, 10 Dec 2024 12:58:32 +0100 Subject: [PATCH 7/9] =?UTF-8?q?=F0=9F=8E=A8=20Style(config=5Fgenerator):?= =?UTF-8?q?=20Change=20function=20names,=20parameters=20and=20internal=20v?= =?UTF-8?q?ariables=20to=20resemble=20config=5Fmanager?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MicW2Graph/MicW2Graph_config.yaml | 86 +++---- vuegen/config_generator.py | 236 +++++++++--------- vuegen/config_manager.py | 3 + 3 files changed, 162 insertions(+), 163 deletions(-) diff --git a/example_data/MicW2Graph/MicW2Graph_config.yaml b/example_data/MicW2Graph/MicW2Graph_config.yaml index ea53130..cd2cc51 100644 --- a/example_data/MicW2Graph/MicW2Graph_config.yaml +++ b/example_data/MicW2Graph/MicW2Graph_config.yaml @@ -10,96 +10,96 @@ sections: - title: Abundance Data description: '' components: - - title: Top Species Plot Biome Plotly - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/1_top_species_plot_biome_plotly.json - description: '' - component_type: plot + - component_type: plot plot_type: plotly - - title: Multilineplot Altair - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/2_multilineplot_altair.json + title: Top Species Plot Biome Plotly + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/1_top_species_plot_biome_plotly.json description: '' - component_type: plot + - component_type: plot plot_type: altair - - title: Abundance Data Allbiomes - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/3_abundance_data_allbiomes.csv + title: Multilineplot Altair + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/2_multilineplot_altair.json description: '' - component_type: dataframe + - component_type: dataframe file_format: csv delimiter: ',' - - title: Abundance Data Allbiomes - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/4_abundance_data_allbiomes.xls + title: Abundance Data Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/3_abundance_data_allbiomes.csv description: '' - component_type: dataframe + - component_type: dataframe file_format: xls + title: Abundance Data Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/4_abundance_data_allbiomes.xls + description: '' - title: Sample Data description: '' components: - - title: Pie Plot Countries Plotly + - component_type: plot + plot_type: plotly + title: Pie Plot Countries Plotly file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/1_pie_plot_countries_plotly.json description: '' - component_type: plot + - component_type: plot plot_type: plotly - - title: Pie Plots Biomes Plotly + title: Pie Plots Biomes Plotly file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/2_pie_plots_biomes_plotly.json description: '' - component_type: plot - plot_type: plotly - - title: Number Samples Per Study - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/3_number_samples_per_study.png - description: '' - component_type: plot + - component_type: plot plot_type: static - - title: Sample Info Allbiomes - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/4_sample_info_allbiomes.parquet + title: Number Samples Per Study + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/3_number_samples_per_study.png description: '' - component_type: dataframe + - component_type: dataframe file_format: parquet - - title: Sample Info Allbiomes - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/5_sample_info_allbiomes.txt + title: Sample Info Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/4_sample_info_allbiomes.parquet description: '' - component_type: dataframe + - component_type: dataframe file_format: txt delimiter: \t + title: Sample Info Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/5_sample_info_allbiomes.txt + description: '' - title: Extra Info description: '' components: - - title: Test Md + - component_type: markdown + title: Test Md file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/3_Extra_info/1_test_md.md description: '' - component_type: markdown - title: Microbial Association Networks description: '' subsections: - title: Network Visualization1 description: '' components: - - title: Man Example + - component_type: plot + plot_type: interactive_network + title: Man Example file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/1_Network_visualization1/1_man_example.graphml description: '' - component_type: plot - plot_type: interactive_network - title: Network Visualization2 description: '' components: - - title: Man Example Edgelist - file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv - description: '' - component_type: plot + - component_type: plot plot_type: interactive_network csv_network_format: edgelist + title: Man Example Edgelist + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv + description: '' - title: Network Visualization3 description: '' components: - - title: Man Example + - component_type: plot + plot_type: interactive_network + title: Man Example file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/3_Network_visualization3/1_man_example.cyjs description: '' - component_type: plot - plot_type: interactive_network - title: Network Visualization4 description: '' components: - - title: Ckg Network + - component_type: plot + plot_type: interactive_network + title: Ckg Network file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/4_Network_visualization4/1_ckg_network.html description: '' - component_type: plot - plot_type: interactive_network diff --git a/vuegen/config_generator.py b/vuegen/config_generator.py index e6f33cb..8932034 100644 --- a/vuegen/config_generator.py +++ b/vuegen/config_generator.py @@ -3,107 +3,105 @@ import report as r from typing import Dict, List, Union, Tuple - -def infer_title_from_file_dir_name(filename: str) -> str: +def _create_title_fromdir(file_dirname: str) -> str: """ - Infers a human-readable title from a filename, removing leading numeric prefixes. + Infers title from a file or directory, removing leading numeric prefixes. Parameters ---------- - filename : str - The filename to infer the title from. + file_dirname : str + The file or directory name to infer the title from. Returns ------- str - A human-readable title generated from the filename. + A title generated from the file or directory name. """ # Remove leading numbers and underscores if they exist - name = os.path.splitext(filename)[0] + name = os.path.splitext(file_dirname)[0] parts = name.split("_", 1) title = parts[1] if parts[0].isdigit() and len(parts) > 1 else name return title.replace("_", " ").title() - -def infer_component_metadata(file: Path, logger=None) -> Dict[str, str]: +def _create_component_config_fromfile(file_path: Path) -> Dict[str, str]: """ - Infers metadata for a file, including component type, plot type, and additional fields. + Infers a component config from a file, including component type, plot type, and additional fields. Parameters ---------- - file : Path - The file to analyze. - logger : optional - Logger to record errors and warnings. + file_path : Path + The file path to analyze. Returns ------- - Dict[str,str] - A dictionary containing inferred metadata. + component_config : Dict[str, str] + A dictionary containing inferred component configuration. """ - ext = file.suffix.lower() - metadata = {} - - # Infer component type and metadata - if ext in [r.DataFrameFormat.CSV.value_with_dot, r.DataFrameFormat.TXT.value_with_dot]: - # Check for network-related keywords - if "edgelist" in file.stem.lower(): - metadata["component_type"] = r.ComponentType.PLOT.value - metadata["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value - metadata["csv_network_format"] = r.CSVNetworkFormat.EDGELIST.value - elif "adjlist" in file.stem.lower(): - metadata["component_type"] = r.ComponentType.PLOT.value - metadata["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value - metadata["csv_network_format"] = r.CSVNetworkFormat.ADJLIST.value + file_ext = file_path.suffix.lower() + component_config = {} + + # Infer component config + if file_ext in [r.DataFrameFormat.CSV.value_with_dot, r.DataFrameFormat.TXT.value_with_dot]: + # Check for CSVNetworkFormat keywords + if "edgelist" in file_path.stem.lower(): + component_config["component_type"] = r.ComponentType.PLOT.value + component_config["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + component_config ["csv_network_format"] = r.CSVNetworkFormat.EDGELIST.value + elif "adjlist" in file_path.stem.lower(): + component_config ["component_type"] = r.ComponentType.PLOT.value + component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + component_config ["csv_network_format"] = r.CSVNetworkFormat.ADJLIST.value + # Fill the config with dataframe content else: - metadata["component_type"] = r.ComponentType.DATAFRAME.value - metadata["file_format"] = r.DataFrameFormat.CSV.value if ext == r.DataFrameFormat.CSV.value_with_dot else r.DataFrameFormat.TXT.value - metadata["delimiter"] = "," if ext == r.DataFrameFormat.CSV.value_with_dot else "\\t" - elif ext in [fmt.value_with_dot for fmt in r.DataFrameFormat if fmt not in [r.DataFrameFormat.CSV, r.DataFrameFormat.TXT]]: - metadata["component_type"] = r.ComponentType.DATAFRAME.value - metadata["file_format"] = next(fmt.value for fmt in r.DataFrameFormat if fmt.value_with_dot == ext) - elif ext in [fmt.value_with_dot for fmt in r.NetworkFormat]: - metadata["component_type"] = r.ComponentType.PLOT.value - if ext in [ + component_config ["component_type"] = r.ComponentType.DATAFRAME.value + component_config ["file_format"] = r.DataFrameFormat.CSV.value if file_ext == r.DataFrameFormat.CSV.value_with_dot else r.DataFrameFormat.TXT.value + component_config ["delimiter"] = "," if file_ext == r.DataFrameFormat.CSV.value_with_dot else "\\t" + # Check other DataframeFormats than csv and txt + elif file_ext in [fmt.value_with_dot for fmt in r.DataFrameFormat if fmt not in [r.DataFrameFormat.CSV, r.DataFrameFormat.TXT]]: + component_config ["component_type"] = r.ComponentType.DATAFRAME.value + component_config ["file_format"] = next(fmt.value for fmt in r.DataFrameFormat if fmt.value_with_dot == file_ext) + # Check for network formats + elif file_ext in [fmt.value_with_dot for fmt in r.NetworkFormat]: + component_config ["component_type"] = r.ComponentType.PLOT.value + if file_ext in [ r.NetworkFormat.PNG.value_with_dot, r.NetworkFormat.JPG.value_with_dot, r.NetworkFormat.JPEG.value_with_dot, r.NetworkFormat.SVG.value_with_dot, ]: - metadata["plot_type"] = r.PlotType.STATIC.value + component_config ["plot_type"] = r.PlotType.STATIC.value else: - metadata["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value - elif ext == ".json": - metadata["component_type"] = r.ComponentType.PLOT.value - if "plotly" in file.stem.lower(): - metadata["plot_type"] = r.PlotType.PLOTLY.value - elif "altair" in file.stem.lower(): - metadata["plot_type"] = r.PlotType.ALTAIR.value + component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + # Check for interactive plots + elif file_ext == ".json": + component_config ["component_type"] = r.ComponentType.PLOT.value + if "plotly" in file_path.stem.lower(): + component_config ["plot_type"] = r.PlotType.PLOTLY.value + elif "altair" in file_path.stem.lower(): + component_config ["plot_type"] = r.PlotType.ALTAIR.value else: - metadata["plot_type"] = "unknown" - elif ext == ".md": - metadata["component_type"] = r.ComponentType.MARKDOWN.value + component_config ["plot_type"] = "unknown" + elif file_ext == ".md": + component_config ["component_type"] = r.ComponentType.MARKDOWN.value else: - # Unified error for unsupported extensions error_msg = ( - f"Unsupported file extension: {ext}. " + f"Unsupported file extension: {file_ext}. " f"Supported extensions include:\n" f" - Network formats: {', '.join(fmt.value_with_dot for fmt in r.NetworkFormat)}\n" f" - DataFrame formats: {', '.join(fmt.value_with_dot for fmt in r.DataFrameFormat)}" ) - if logger: - logger.error(error_msg) + #self.logger.error(error_msg) raise ValueError(error_msg) - return metadata + return component_config -def sort_items_by_number_prefix(items: List[Path]) -> List[Path]: +def _sort_paths_by_numprefix(paths: List[Path]) -> List[Path]: """ Sorts a list of Paths by numeric prefixes in their names, placing non-numeric items at the end. Parameters ---------- - items : List[Path] + paths : List[Path] The list of Path objects to sort. Returns @@ -111,141 +109,138 @@ def sort_items_by_number_prefix(items: List[Path]) -> List[Path]: List[Path] The sorted list of Path objects. """ - def get_sort_key(item: Path) -> tuple: - parts = item.name.split("_", 1) + def get_sort_key(path: Path) -> tuple: + parts = path.name.split("_", 1) if parts[0].isdigit(): numeric_prefix = int(parts[0]) else: # Non-numeric prefixes go to the end numeric_prefix = float('inf') - return numeric_prefix, item.name.lower() + return numeric_prefix, path.name.lower() - return sorted(items, key=get_sort_key) + return sorted(paths, key=get_sort_key) -def generate_subsection_data(subsection_folder: Path) -> Dict[str, Union[str, List[Dict]]]: +def _create_subsect_config_fromdir(subsection_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: """ - Generates data for a single subsection. + Creates subsection config from a directory. Parameters ---------- - subsection_folder : Path - Path to the subsection folder. + subsection_dir_path : Path + Path to the subsection directory. Returns ------- Dict[str, Union[str, List[Dict]]] - The subsection data. + The subsection config. """ - subsection_data = { - "title": infer_title_from_file_dir_name(subsection_folder.name), + subsection_config = { + "title": _create_title_fromdir(subsection_dir_path.name), "description": "", "components": [], } # Sort files by number prefix - sorted_files = sort_items_by_number_prefix(list(subsection_folder.iterdir())) + sorted_files = _sort_paths_by_numprefix(list(subsection_dir_path.iterdir())) for file in sorted_files: if file.is_file(): - metadata = infer_component_metadata(file) + component_config = _create_component_config_fromfile(file) - # Ensure the file path is absolute and relative to base_folder - file_path = file.resolve() # Get the absolute path + # Ensure the file path is absolute + file_path = file.resolve() - # The relative path from base_folder is now absolute to the folder structure - component_data = { - "title": infer_title_from_file_dir_name(file.name), - "file_path": str(file_path), # Use the absolute file path here + component_config_updt = { + "title": _create_title_fromdir(file.name), + "file_path": str(file_path), "description": "", } - # Merge inferred metadata into component data - component_data.update(metadata) - - subsection_data["components"].append(component_data) + # Update inferred config information + component_config.update(component_config_updt) - return subsection_data + subsection_config["components"].append(component_config) + return subsection_config -def generate_section_data(section_folder: Path, base_folder: Path) -> Dict[str, Union[str, List[Dict]]]: +def _create_sect_config_fromdir(section_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: """ - Generates data for a single section. + Creates section config from a directory. Parameters ---------- - section_folder : Path - Path to the section folder. - base_folder : Path - The base folder path to ensure proper path calculation. + section_dir_path : Path + Path to the section directory. Returns ------- Dict[str, Union[str, List[Dict]]] - The section data. + The section config. """ - section_data = { - "title": infer_title_from_file_dir_name(section_folder.name), + section_config = { + "title": _create_title_fromdir(section_dir_path.name), "description": "", "subsections": [], } # Sort subsections by number prefix - sorted_subsections = sort_items_by_number_prefix(list(section_folder.iterdir())) + sorted_subsections = _sort_paths_by_numprefix(list(section_dir_path.iterdir())) - for subsection_folder in sorted_subsections: - if subsection_folder.is_dir(): - section_data["subsections"].append(generate_subsection_data(subsection_folder)) + for subsection_dir in sorted_subsections: + if subsection_dir.is_dir(): + section_config["subsections"].append(_create_subsect_config_fromdir(subsection_dir)) - return section_data + return section_config -def resolve_base_folder(base_folder: str) -> Path: +def _resolve_base_dir(base_dir: str) -> Path: """ - Resolves the provided base folder to an absolute path from the root, accounting for relative paths. + Resolves the provided base directory to an absolute path from the root, accounting for relative paths. Parameters ---------- - base_folder : str - The relative or absolute path to the base folder. + base_dir : str + The relative or absolute path to the base directory. Returns ------- Path - The absolute path to the base folder. + The absolute path to the base directory. """ # Check if we are in a subdirectory and need to go up one level project_dir = Path(__file__).resolve().parents[1] - # If the base_folder is a relative path, resolve it from the project root - base_folder_path = project_dir / base_folder + # If the base_dir is a relative path, resolve it from the project root + base_dir_path = project_dir / base_dir - # Make sure the resolved base folder exists - if not base_folder_path.is_dir(): - raise ValueError(f"Base folder '{base_folder}' does not exist or is not a directory.") + # Make sure the resolved base directory exists + if not base_dir_path.is_dir(): + raise ValueError(f"Base directory '{base_dir}' does not exist or is not a directory.") - return base_folder_path + return base_dir_path -def generate_yaml_structure(folder: str) -> Tuple[Dict[str, Union[str, List[Dict]]], Path]: +def create_yamlconfig_fromdir(base_dir: str) -> Tuple[Dict[str, Union[str, List[Dict]]], Path]: """ - Generates a YAML-compatible structure from a folder hierarchy and returns the resolved folder path. + Generates a YAML-compatible config file from a directory. It also returns the resolved folder path. Parameters ---------- - folder : str - The base folder containing section and subsection folders. + base_dir : str + The base directory containing section and subsection folders. Returns ------- Tuple[Dict[str, Union[str, List[Dict]]], Path] - The YAML-compatible structure and the resolved folder path. + The YAML config and the resolved directory path. """ - folder_path = resolve_base_folder(folder) # Resolve the base folder path + # Get absolute path from base directory + base_dir_path = _resolve_base_dir(base_dir) - # Generate the YAML structure - yaml_structure = { + # Generate the YAML config + yaml_config = { "report": { - "title": infer_title_from_file_dir_name(folder_path.name), + "title": _create_title_fromdir(base_dir_path.name), "description": "", "graphical_abstract": "", "logo": "", @@ -254,10 +249,11 @@ def generate_yaml_structure(folder: str) -> Tuple[Dict[str, Union[str, List[Dict } # Sort sections by their number prefix - sorted_sections = sort_items_by_number_prefix(list(folder_path.iterdir())) + sorted_sections = _sort_paths_by_numprefix(list(base_dir_path.iterdir())) - for section_folder in sorted_sections: - if section_folder.is_dir(): - yaml_structure["sections"].append(generate_section_data(section_folder, folder_path)) + # Generate sections and subsections config + for section_dir in sorted_sections: + if section_dir.is_dir(): + yaml_config["sections"].append(_create_sect_config_fromdir(section_dir)) - return yaml_structure, folder_path \ No newline at end of file + return yaml_config, base_dir_path \ No newline at end of file diff --git a/vuegen/config_manager.py b/vuegen/config_manager.py index d06f9f5..bb09b06 100644 --- a/vuegen/config_manager.py +++ b/vuegen/config_manager.py @@ -1,4 +1,7 @@ +import os import report as r +from pathlib import Path +from typing import Dict, List, Union, Tuple from utils import get_logger, assert_enum_value class ConfigManager: From 596c2044cef26052cd0ce65f330c936103e745f8 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Tue, 10 Dec 2024 16:13:32 +0100 Subject: [PATCH 8/9] =?UTF-8?q?=F0=9F=90=9B=20Fix(quarto=5Freportview):=20?= =?UTF-8?q?Add=20id=20to=20the=20label=20chunks=20in=20the=20generated=20q?= =?UTF-8?q?md=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vuegen/quarto_reportview.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vuegen/quarto_reportview.py b/vuegen/quarto_reportview.py index 2bfd862..9b98e27 100644 --- a/vuegen/quarto_reportview.py +++ b/vuegen/quarto_reportview.py @@ -332,7 +332,7 @@ def _generate_plot_code(self, plot, output_file = "") -> str: """ # Initialize plot code with common structure plot_code = f"""```{{python}} -#| label: '{plot.title}' +#| label: '{plot.title} {plot.id}' #| fig-cap: "" """ # If the file path is a URL, generate code to fetch content via requests @@ -388,7 +388,7 @@ def _generate_dataframe_content(self, dataframe, is_report_static) -> List[str]: # Append header for DataFrame loading dataframe_content.append(f"""```{{python}} -#| label: '{dataframe.title}' +#| label: '{dataframe.title} {dataframe.id}' #| fig-cap: "" """) # Mapping of file extensions to read functions @@ -447,7 +447,7 @@ def _generate_markdown_content(self, markdown) -> List[str]: # Initialize md code with common structure markdown_content.append(f""" ```{{python}} -#| label: '{markdown.title}' +#| label: '{markdown.title} {markdown.id}' #| fig-cap: ""\n""") # If the file path is a URL, generate code to fetch content via requests if is_url(markdown.file_path): From 19d7e824d99b12fe5ddb97efebdbe4b5e886064a Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Wed, 11 Dec 2024 16:51:37 +0100 Subject: [PATCH 9/9] =?UTF-8?q?=E2=9C=A8=20Feat(config=5Fmanager):=20Add?= =?UTF-8?q?=20code=20to=20create=20config=20file=20from=20a=20directory=20?= =?UTF-8?q?in=20config=5Fmanager.=20Closes:=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code that was before in an independent module was added to config_manager. So, this class controls all config stuff. --- vuegen/config_generator.py | 259 ------------------------------------- vuegen/config_manager.py | 255 ++++++++++++++++++++++++++++++++++++ vuegen/main.py | 25 +++- vuegen/report_generator.py | 28 ++-- vuegen/utils.py | 26 +++- 5 files changed, 312 insertions(+), 281 deletions(-) delete mode 100644 vuegen/config_generator.py diff --git a/vuegen/config_generator.py b/vuegen/config_generator.py deleted file mode 100644 index 8932034..0000000 --- a/vuegen/config_generator.py +++ /dev/null @@ -1,259 +0,0 @@ -import os -from pathlib import Path -import report as r -from typing import Dict, List, Union, Tuple - -def _create_title_fromdir(file_dirname: str) -> str: - """ - Infers title from a file or directory, removing leading numeric prefixes. - - Parameters - ---------- - file_dirname : str - The file or directory name to infer the title from. - - Returns - ------- - str - A title generated from the file or directory name. - """ - # Remove leading numbers and underscores if they exist - name = os.path.splitext(file_dirname)[0] - parts = name.split("_", 1) - title = parts[1] if parts[0].isdigit() and len(parts) > 1 else name - return title.replace("_", " ").title() - -def _create_component_config_fromfile(file_path: Path) -> Dict[str, str]: - """ - Infers a component config from a file, including component type, plot type, and additional fields. - - Parameters - ---------- - file_path : Path - The file path to analyze. - - Returns - ------- - component_config : Dict[str, str] - A dictionary containing inferred component configuration. - """ - file_ext = file_path.suffix.lower() - component_config = {} - - # Infer component config - if file_ext in [r.DataFrameFormat.CSV.value_with_dot, r.DataFrameFormat.TXT.value_with_dot]: - # Check for CSVNetworkFormat keywords - if "edgelist" in file_path.stem.lower(): - component_config["component_type"] = r.ComponentType.PLOT.value - component_config["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value - component_config ["csv_network_format"] = r.CSVNetworkFormat.EDGELIST.value - elif "adjlist" in file_path.stem.lower(): - component_config ["component_type"] = r.ComponentType.PLOT.value - component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value - component_config ["csv_network_format"] = r.CSVNetworkFormat.ADJLIST.value - # Fill the config with dataframe content - else: - component_config ["component_type"] = r.ComponentType.DATAFRAME.value - component_config ["file_format"] = r.DataFrameFormat.CSV.value if file_ext == r.DataFrameFormat.CSV.value_with_dot else r.DataFrameFormat.TXT.value - component_config ["delimiter"] = "," if file_ext == r.DataFrameFormat.CSV.value_with_dot else "\\t" - # Check other DataframeFormats than csv and txt - elif file_ext in [fmt.value_with_dot for fmt in r.DataFrameFormat if fmt not in [r.DataFrameFormat.CSV, r.DataFrameFormat.TXT]]: - component_config ["component_type"] = r.ComponentType.DATAFRAME.value - component_config ["file_format"] = next(fmt.value for fmt in r.DataFrameFormat if fmt.value_with_dot == file_ext) - # Check for network formats - elif file_ext in [fmt.value_with_dot for fmt in r.NetworkFormat]: - component_config ["component_type"] = r.ComponentType.PLOT.value - if file_ext in [ - r.NetworkFormat.PNG.value_with_dot, - r.NetworkFormat.JPG.value_with_dot, - r.NetworkFormat.JPEG.value_with_dot, - r.NetworkFormat.SVG.value_with_dot, - ]: - component_config ["plot_type"] = r.PlotType.STATIC.value - else: - component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value - # Check for interactive plots - elif file_ext == ".json": - component_config ["component_type"] = r.ComponentType.PLOT.value - if "plotly" in file_path.stem.lower(): - component_config ["plot_type"] = r.PlotType.PLOTLY.value - elif "altair" in file_path.stem.lower(): - component_config ["plot_type"] = r.PlotType.ALTAIR.value - else: - component_config ["plot_type"] = "unknown" - elif file_ext == ".md": - component_config ["component_type"] = r.ComponentType.MARKDOWN.value - else: - error_msg = ( - f"Unsupported file extension: {file_ext}. " - f"Supported extensions include:\n" - f" - Network formats: {', '.join(fmt.value_with_dot for fmt in r.NetworkFormat)}\n" - f" - DataFrame formats: {', '.join(fmt.value_with_dot for fmt in r.DataFrameFormat)}" - ) - #self.logger.error(error_msg) - raise ValueError(error_msg) - - return component_config - -def _sort_paths_by_numprefix(paths: List[Path]) -> List[Path]: - """ - Sorts a list of Paths by numeric prefixes in their names, placing non-numeric items at the end. - - Parameters - ---------- - paths : List[Path] - The list of Path objects to sort. - - Returns - ------- - List[Path] - The sorted list of Path objects. - """ - def get_sort_key(path: Path) -> tuple: - parts = path.name.split("_", 1) - if parts[0].isdigit(): - numeric_prefix = int(parts[0]) - else: - # Non-numeric prefixes go to the end - numeric_prefix = float('inf') - return numeric_prefix, path.name.lower() - - return sorted(paths, key=get_sort_key) - -def _create_subsect_config_fromdir(subsection_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: - """ - Creates subsection config from a directory. - - Parameters - ---------- - subsection_dir_path : Path - Path to the subsection directory. - - Returns - ------- - Dict[str, Union[str, List[Dict]]] - The subsection config. - """ - subsection_config = { - "title": _create_title_fromdir(subsection_dir_path.name), - "description": "", - "components": [], - } - - # Sort files by number prefix - sorted_files = _sort_paths_by_numprefix(list(subsection_dir_path.iterdir())) - - for file in sorted_files: - if file.is_file(): - component_config = _create_component_config_fromfile(file) - - # Ensure the file path is absolute - file_path = file.resolve() - - component_config_updt = { - "title": _create_title_fromdir(file.name), - "file_path": str(file_path), - "description": "", - } - - # Update inferred config information - component_config.update(component_config_updt) - - subsection_config["components"].append(component_config) - - return subsection_config - -def _create_sect_config_fromdir(section_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: - """ - Creates section config from a directory. - - Parameters - ---------- - section_dir_path : Path - Path to the section directory. - - Returns - ------- - Dict[str, Union[str, List[Dict]]] - The section config. - """ - section_config = { - "title": _create_title_fromdir(section_dir_path.name), - "description": "", - "subsections": [], - } - - # Sort subsections by number prefix - sorted_subsections = _sort_paths_by_numprefix(list(section_dir_path.iterdir())) - - for subsection_dir in sorted_subsections: - if subsection_dir.is_dir(): - section_config["subsections"].append(_create_subsect_config_fromdir(subsection_dir)) - - return section_config - - -def _resolve_base_dir(base_dir: str) -> Path: - """ - Resolves the provided base directory to an absolute path from the root, accounting for relative paths. - - Parameters - ---------- - base_dir : str - The relative or absolute path to the base directory. - - Returns - ------- - Path - The absolute path to the base directory. - """ - # Check if we are in a subdirectory and need to go up one level - project_dir = Path(__file__).resolve().parents[1] - - # If the base_dir is a relative path, resolve it from the project root - base_dir_path = project_dir / base_dir - - # Make sure the resolved base directory exists - if not base_dir_path.is_dir(): - raise ValueError(f"Base directory '{base_dir}' does not exist or is not a directory.") - - return base_dir_path - - -def create_yamlconfig_fromdir(base_dir: str) -> Tuple[Dict[str, Union[str, List[Dict]]], Path]: - """ - Generates a YAML-compatible config file from a directory. It also returns the resolved folder path. - - Parameters - ---------- - base_dir : str - The base directory containing section and subsection folders. - - Returns - ------- - Tuple[Dict[str, Union[str, List[Dict]]], Path] - The YAML config and the resolved directory path. - """ - # Get absolute path from base directory - base_dir_path = _resolve_base_dir(base_dir) - - # Generate the YAML config - yaml_config = { - "report": { - "title": _create_title_fromdir(base_dir_path.name), - "description": "", - "graphical_abstract": "", - "logo": "", - }, - "sections": [], - } - - # Sort sections by their number prefix - sorted_sections = _sort_paths_by_numprefix(list(base_dir_path.iterdir())) - - # Generate sections and subsections config - for section_dir in sorted_sections: - if section_dir.is_dir(): - yaml_config["sections"].append(_create_sect_config_fromdir(section_dir)) - - return yaml_config, base_dir_path \ No newline at end of file diff --git a/vuegen/config_manager.py b/vuegen/config_manager.py index bb09b06..b72bf96 100644 --- a/vuegen/config_manager.py +++ b/vuegen/config_manager.py @@ -19,6 +19,261 @@ def __init__(self, logger=None): """ self.logger = logger or get_logger("report") + def _create_title_fromdir(self, file_dirname: str) -> str: + """ + Infers title from a file or directory, removing leading numeric prefixes. + + Parameters + ---------- + file_dirname : str + The file or directory name to infer the title from. + + Returns + ------- + str + A title generated from the file or directory name. + """ + # Remove leading numbers and underscores if they exist + name = os.path.splitext(file_dirname)[0] + parts = name.split("_", 1) + title = parts[1] if parts[0].isdigit() and len(parts) > 1 else name + return title.replace("_", " ").title() + + def _create_component_config_fromfile(self, file_path: Path) -> Dict[str, str]: + """ + Infers a component config from a file, including component type, plot type, and additional fields. + + Parameters + ---------- + file_path : Path + The file path to analyze. + + Returns + ------- + component_config : Dict[str, str] + A dictionary containing inferred component configuration. + """ + file_ext = file_path.suffix.lower() + component_config = {} + + # Infer component config + if file_ext in [r.DataFrameFormat.CSV.value_with_dot, r.DataFrameFormat.TXT.value_with_dot]: + # Check for CSVNetworkFormat keywords + if "edgelist" in file_path.stem.lower(): + component_config["component_type"] = r.ComponentType.PLOT.value + component_config["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + component_config ["csv_network_format"] = r.CSVNetworkFormat.EDGELIST.value + elif "adjlist" in file_path.stem.lower(): + component_config ["component_type"] = r.ComponentType.PLOT.value + component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + component_config ["csv_network_format"] = r.CSVNetworkFormat.ADJLIST.value + # Fill the config with dataframe content + else: + component_config ["component_type"] = r.ComponentType.DATAFRAME.value + component_config ["file_format"] = r.DataFrameFormat.CSV.value if file_ext == r.DataFrameFormat.CSV.value_with_dot else r.DataFrameFormat.TXT.value + component_config ["delimiter"] = "," if file_ext == r.DataFrameFormat.CSV.value_with_dot else "\\t" + # Check other DataframeFormats than csv and txt + elif file_ext in [fmt.value_with_dot for fmt in r.DataFrameFormat if fmt not in [r.DataFrameFormat.CSV, r.DataFrameFormat.TXT]]: + component_config ["component_type"] = r.ComponentType.DATAFRAME.value + component_config ["file_format"] = next(fmt.value for fmt in r.DataFrameFormat if fmt.value_with_dot == file_ext) + # Check for network formats + elif file_ext in [fmt.value_with_dot for fmt in r.NetworkFormat]: + component_config ["component_type"] = r.ComponentType.PLOT.value + if file_ext in [ + r.NetworkFormat.PNG.value_with_dot, + r.NetworkFormat.JPG.value_with_dot, + r.NetworkFormat.JPEG.value_with_dot, + r.NetworkFormat.SVG.value_with_dot, + ]: + component_config ["plot_type"] = r.PlotType.STATIC.value + else: + component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + # Check for interactive plots + elif file_ext == ".json": + component_config ["component_type"] = r.ComponentType.PLOT.value + if "plotly" in file_path.stem.lower(): + component_config ["plot_type"] = r.PlotType.PLOTLY.value + elif "altair" in file_path.stem.lower(): + component_config ["plot_type"] = r.PlotType.ALTAIR.value + else: + component_config ["plot_type"] = "unknown" + elif file_ext == ".md": + component_config ["component_type"] = r.ComponentType.MARKDOWN.value + else: + error_msg = ( + f"Unsupported file extension: {file_ext}. " + f"Supported extensions include:\n" + f" - Network formats: {', '.join(fmt.value_with_dot for fmt in r.NetworkFormat)}\n" + f" - DataFrame formats: {', '.join(fmt.value_with_dot for fmt in r.DataFrameFormat)}" + ) + #self.logger.error(error_msg) + raise ValueError(error_msg) + + return component_config + + def _sort_paths_by_numprefix(self, paths: List[Path]) -> List[Path]: + """ + Sorts a list of Paths by numeric prefixes in their names, placing non-numeric items at the end. + + Parameters + ---------- + paths : List[Path] + The list of Path objects to sort. + + Returns + ------- + List[Path] + The sorted list of Path objects. + """ + def get_sort_key(path: Path) -> tuple: + parts = path.name.split("_", 1) + if parts[0].isdigit(): + numeric_prefix = int(parts[0]) + else: + # Non-numeric prefixes go to the end + numeric_prefix = float('inf') + return numeric_prefix, path.name.lower() + + return sorted(paths, key=get_sort_key) + + def _create_subsect_config_fromdir(self, subsection_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: + """ + Creates subsection config from a directory. + + Parameters + ---------- + subsection_dir_path : Path + Path to the subsection directory. + + Returns + ------- + Dict[str, Union[str, List[Dict]]] + The subsection config. + """ + subsection_config = { + "title": self._create_title_fromdir(subsection_dir_path.name), + "description": "", + "components": [], + } + + # Sort files by number prefix + sorted_files = self._sort_paths_by_numprefix(list(subsection_dir_path.iterdir())) + + for file in sorted_files: + if file.is_file(): + component_config = self._create_component_config_fromfile(file) + + # Ensure the file path is absolute + file_path = file.resolve() + + component_config_updt = { + "title": self._create_title_fromdir(file.name), + "file_path": str(file_path), + "description": "", + } + + # Update inferred config information + component_config.update(component_config_updt) + + subsection_config["components"].append(component_config) + + return subsection_config + + def _create_sect_config_fromdir(self, section_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: + """ + Creates section config from a directory. + + Parameters + ---------- + section_dir_path : Path + Path to the section directory. + + Returns + ------- + Dict[str, Union[str, List[Dict]]] + The section config. + """ + section_config = { + "title": self._create_title_fromdir(section_dir_path.name), + "description": "", + "subsections": [], + } + + # Sort subsections by number prefix + sorted_subsections = self._sort_paths_by_numprefix(list(section_dir_path.iterdir())) + + for subsection_dir in sorted_subsections: + if subsection_dir.is_dir(): + section_config["subsections"].append(self._create_subsect_config_fromdir(subsection_dir)) + + return section_config + + + def _resolve_base_dir(self, base_dir: str) -> Path: + """ + Resolves the provided base directory to an absolute path from the root, accounting for relative paths. + + Parameters + ---------- + base_dir : str + The relative or absolute path to the base directory. + + Returns + ------- + Path + The absolute path to the base directory. + """ + # Check if we are in a subdirectory and need to go up one level + project_dir = Path(__file__).resolve().parents[1] + + # If the base_dir is a relative path, resolve it from the project root + base_dir_path = project_dir / base_dir + + # Make sure the resolved base directory exists + if not base_dir_path.is_dir(): + raise ValueError(f"Base directory '{base_dir}' does not exist or is not a directory.") + + return base_dir_path + + + def create_yamlconfig_fromdir(self, base_dir: str) -> Tuple[Dict[str, Union[str, List[Dict]]], Path]: + """ + Generates a YAML-compatible config file from a directory. It also returns the resolved folder path. + + Parameters + ---------- + base_dir : str + The base directory containing section and subsection folders. + + Returns + ------- + Tuple[Dict[str, Union[str, List[Dict]]], Path] + The YAML config and the resolved directory path. + """ + # Get absolute path from base directory + base_dir_path = self._resolve_base_dir(base_dir) + + # Generate the YAML config + yaml_config = { + "report": { + "title": self._create_title_fromdir(base_dir_path.name), + "description": "", + "graphical_abstract": "", + "logo": "", + }, + "sections": [], + } + + # Sort sections by their number prefix + sorted_sections = self._sort_paths_by_numprefix(list(base_dir_path.iterdir())) + + # Generate sections and subsections config + for section_dir in sorted_sections: + if section_dir.is_dir(): + yaml_config["sections"].append(self._create_sect_config_fromdir(section_dir)) + + return yaml_config, base_dir_path + def initialize_report(self, config: dict) -> tuple[r.Report, dict]: """ Extracts report metadata from a YAML config file and returns a Report object and the raw metadata. diff --git a/vuegen/main.py b/vuegen/main.py index b163954..6785c38 100644 --- a/vuegen/main.py +++ b/vuegen/main.py @@ -1,22 +1,33 @@ import report_generator -from utils import get_logger, load_yaml_config, get_args +from pathlib import Path +from utils import get_logger, get_args if __name__ == '__main__': # Parse command-line arguments args = get_args(prog_name="VueGen") + + # Determine the configuration file path or directory config_path = args.config + dir_path = args.directory + + # Report type report_type = args.report_type - - # Load the YAML configuration file with the report metadata - report_config = load_yaml_config(config_path) + + # Determine the report name for logger suffix + if config_path: + report_name = Path(config_path).stem + else: + report_name = Path(dir_path).name # Define logger suffix based on report type and name - report_title = report_config['report'].get('title') - logger_suffix = f"{report_type}_report_{report_title}" + logger_suffix = f"{report_type}_report_{report_name}" # Initialize logger logger = get_logger(f"{logger_suffix}") # Generate the report - report_generator.get_report(config = report_config, report_type = report_type, logger = logger) + report_generator.get_report(config_path = config_path, + dir_path = dir_path, + report_type = report_type, + logger = logger) diff --git a/vuegen/report_generator.py b/vuegen/report_generator.py index 80c11c9..acbf9c3 100644 --- a/vuegen/report_generator.py +++ b/vuegen/report_generator.py @@ -1,31 +1,43 @@ from streamlit_reportview import StreamlitReportView from quarto_reportview import QuartoReportView from config_manager import ConfigManager -from utils import assert_enum_value +from utils import assert_enum_value, load_yaml_config, write_yaml_config from report import ReportType import logging -def get_report(config: dict, report_type: str, logger: logging.Logger) -> None: +def get_report(report_type: str, logger: logging.Logger, config_path: str = None, dir_path: str = None) -> None: """ Generate and run a report based on the specified engine. Parameters ---------- - config : dict - The report metadata obtained from a YAML config file. report_type : str The report type. It should be one of the values of the ReportType Enum. logger : logging.Logger A logger object to track warnings, errors, and info messages. + config : str, optional + Path to the YAML configuration file. + dir_path : str, optional + Path to the directory from which to generate the configuration file. Raises ------ ValueError - If an unsupported report engine, report type, or report format are provided. + If neither 'config_path' nor 'directory' is provided. """ - # Load report object and metadata from the YAML config file - yaml_manager = ConfigManager(logger) - report, report_metadata = yaml_manager.initialize_report(config) + # Initialize the config manager object + config_manager = ConfigManager(logger) + + if dir_path: + # Generate configuration from the provided directory + yaml_data, base_folder_path = config_manager.create_yamlconfig_fromdir(dir_path) + config_path = write_yaml_config(yaml_data, base_folder_path) + + # Load the YAML configuration file with the report metadata + report_config = load_yaml_config(config_path) + + # Load report object and metadata + report, report_metadata = config_manager.initialize_report(report_config) # Validate and convert the report type to its enum value report_type = assert_enum_value(ReportType, report_type, logger) diff --git a/vuegen/utils.py b/vuegen/utils.py index 802547f..6b9cc6e 100644 --- a/vuegen/utils.py +++ b/vuegen/utils.py @@ -169,16 +169,23 @@ def get_args(prog_name: str, others: dict = {}) -> argparse.Namespace: parser.add_argument( "-c", "--config", + type = str, + default = None, + help = "Path to the YAML configuration file." + ) + parser.add_argument( + "-dir", + "--directory", type=str, - default="report_config_micw2graph.yaml", - help="Path to the YAML configuration file." + default=None, + help="Path to the directory from which the YAML config will be inferred." ) parser.add_argument( "-rt", "--report_type", - type=str, - default=None, - help="Type of the report to generate (streamlit, html, pdf, docx, odt, revealjs, pptx, or jupyter)." + type = str, + default = None, + help = "Type of the report to generate (streamlit, html, pdf, docx, odt, revealjs, pptx, or jupyter)." ) # Parse arguments @@ -420,7 +427,7 @@ def load_yaml_config(file_path: str) -> dict: return config -def write_yaml_config(yaml_data: dict, directory_path: Path) -> None: +def write_yaml_config(yaml_data: dict, directory_path: Path) -> Path: """ Writes the generated YAML structure to a file. @@ -433,9 +440,11 @@ def write_yaml_config(yaml_data: dict, directory_path: Path) -> None: Returns ------- - None + output_yaml : Path + The path to the written YAML file. """ assert isinstance(yaml_data, dict), "YAML data must be a dictionary." + assert isinstance(directory_path, Path), "directory_path must be a Path object." # Generate the output YAML file path based on the folder name output_yaml = directory_path / (directory_path.name + "_config.yaml") @@ -448,6 +457,9 @@ def write_yaml_config(yaml_data: dict, directory_path: Path) -> None: with open(output_yaml, "w") as yaml_file: yaml.dump(yaml_data, yaml_file, default_flow_style=False, sort_keys=False) + # Return the path to the written file + return output_yaml + ## LOGGING def get_basename(fname: None | str = None) -> str: """