diff --git a/.gitignore b/.gitignore index 6f52628..cc38bf7 100644 --- a/.gitignore +++ b/.gitignore @@ -126,6 +126,4 @@ quarto_report/ UML_diagrams/ Graphical_abstract/ docs/presentations/ -test.py -vuegen/yaml_generator.py -MicW2Graph_config.yaml \ No newline at end of file +test.py \ No newline at end of file diff --git a/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example.csv b/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv similarity index 100% rename from example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example.csv rename to example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv diff --git a/example_data/MicW2Graph/MicW2Graph_config.yaml b/example_data/MicW2Graph/MicW2Graph_config.yaml new file mode 100644 index 0000000..cd2cc51 --- /dev/null +++ b/example_data/MicW2Graph/MicW2Graph_config.yaml @@ -0,0 +1,105 @@ +report: + title: Micw2Graph + description: '' + graphical_abstract: '' + logo: '' +sections: +- title: Exploratory Data Analysis + description: '' + subsections: + - title: Abundance Data + description: '' + components: + - component_type: plot + plot_type: plotly + title: Top Species Plot Biome Plotly + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/1_top_species_plot_biome_plotly.json + description: '' + - component_type: plot + plot_type: altair + title: Multilineplot Altair + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/2_multilineplot_altair.json + description: '' + - component_type: dataframe + file_format: csv + delimiter: ',' + title: Abundance Data Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/3_abundance_data_allbiomes.csv + description: '' + - component_type: dataframe + file_format: xls + title: Abundance Data Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/1_Abundance_data/4_abundance_data_allbiomes.xls + description: '' + - title: Sample Data + description: '' + components: + - component_type: plot + plot_type: plotly + title: Pie Plot Countries Plotly + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/1_pie_plot_countries_plotly.json + description: '' + - component_type: plot + plot_type: plotly + title: Pie Plots Biomes Plotly + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/2_pie_plots_biomes_plotly.json + description: '' + - component_type: plot + plot_type: static + title: Number Samples Per Study + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/3_number_samples_per_study.png + description: '' + - component_type: dataframe + file_format: parquet + title: Sample Info Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/4_sample_info_allbiomes.parquet + description: '' + - component_type: dataframe + file_format: txt + delimiter: \t + title: Sample Info Allbiomes + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/2_Sample_data/5_sample_info_allbiomes.txt + description: '' + - title: Extra Info + description: '' + components: + - component_type: markdown + title: Test Md + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/1_Exploratory_Data_Analysis/3_Extra_info/1_test_md.md + description: '' +- title: Microbial Association Networks + description: '' + subsections: + - title: Network Visualization1 + description: '' + components: + - component_type: plot + plot_type: interactive_network + title: Man Example + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/1_Network_visualization1/1_man_example.graphml + description: '' + - title: Network Visualization2 + description: '' + components: + - component_type: plot + plot_type: interactive_network + csv_network_format: edgelist + title: Man Example Edgelist + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/2_Network_visualization2/1_man_example_edgelist.csv + description: '' + - title: Network Visualization3 + description: '' + components: + - component_type: plot + plot_type: interactive_network + title: Man Example + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/3_Network_visualization3/1_man_example.cyjs + description: '' + - title: Network Visualization4 + description: '' + components: + - component_type: plot + plot_type: interactive_network + title: Ckg Network + file_path: /Users/asaru/Documents/DTU/MoNA/vuegen/example_data/MicW2Graph/2_Microbial_Association_Networks/4_Network_visualization4/1_ckg_network.html + description: '' diff --git a/vuegen/config_manager.py b/vuegen/config_manager.py index 4ad12ac..be2b510 100644 --- a/vuegen/config_manager.py +++ b/vuegen/config_manager.py @@ -1,3 +1,7 @@ +import os +from pathlib import Path +from typing import Dict, List, Union, Tuple + from . import report as r from .utils import assert_enum_value, get_logger @@ -17,6 +21,261 @@ def __init__(self, logger=None): """ self.logger = logger or get_logger("report") + def _create_title_fromdir(self, file_dirname: str) -> str: + """ + Infers title from a file or directory, removing leading numeric prefixes. + + Parameters + ---------- + file_dirname : str + The file or directory name to infer the title from. + + Returns + ------- + str + A title generated from the file or directory name. + """ + # Remove leading numbers and underscores if they exist + name = os.path.splitext(file_dirname)[0] + parts = name.split("_", 1) + title = parts[1] if parts[0].isdigit() and len(parts) > 1 else name + return title.replace("_", " ").title() + + def _create_component_config_fromfile(self, file_path: Path) -> Dict[str, str]: + """ + Infers a component config from a file, including component type, plot type, and additional fields. + + Parameters + ---------- + file_path : Path + The file path to analyze. + + Returns + ------- + component_config : Dict[str, str] + A dictionary containing inferred component configuration. + """ + file_ext = file_path.suffix.lower() + component_config = {} + + # Infer component config + if file_ext in [r.DataFrameFormat.CSV.value_with_dot, r.DataFrameFormat.TXT.value_with_dot]: + # Check for CSVNetworkFormat keywords + if "edgelist" in file_path.stem.lower(): + component_config["component_type"] = r.ComponentType.PLOT.value + component_config["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + component_config ["csv_network_format"] = r.CSVNetworkFormat.EDGELIST.value + elif "adjlist" in file_path.stem.lower(): + component_config ["component_type"] = r.ComponentType.PLOT.value + component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + component_config ["csv_network_format"] = r.CSVNetworkFormat.ADJLIST.value + # Fill the config with dataframe content + else: + component_config ["component_type"] = r.ComponentType.DATAFRAME.value + component_config ["file_format"] = r.DataFrameFormat.CSV.value if file_ext == r.DataFrameFormat.CSV.value_with_dot else r.DataFrameFormat.TXT.value + component_config ["delimiter"] = "," if file_ext == r.DataFrameFormat.CSV.value_with_dot else "\\t" + # Check other DataframeFormats than csv and txt + elif file_ext in [fmt.value_with_dot for fmt in r.DataFrameFormat if fmt not in [r.DataFrameFormat.CSV, r.DataFrameFormat.TXT]]: + component_config ["component_type"] = r.ComponentType.DATAFRAME.value + component_config ["file_format"] = next(fmt.value for fmt in r.DataFrameFormat if fmt.value_with_dot == file_ext) + # Check for network formats + elif file_ext in [fmt.value_with_dot for fmt in r.NetworkFormat]: + component_config ["component_type"] = r.ComponentType.PLOT.value + if file_ext in [ + r.NetworkFormat.PNG.value_with_dot, + r.NetworkFormat.JPG.value_with_dot, + r.NetworkFormat.JPEG.value_with_dot, + r.NetworkFormat.SVG.value_with_dot, + ]: + component_config ["plot_type"] = r.PlotType.STATIC.value + else: + component_config ["plot_type"] = r.PlotType.INTERACTIVE_NETWORK.value + # Check for interactive plots + elif file_ext == ".json": + component_config ["component_type"] = r.ComponentType.PLOT.value + if "plotly" in file_path.stem.lower(): + component_config ["plot_type"] = r.PlotType.PLOTLY.value + elif "altair" in file_path.stem.lower(): + component_config ["plot_type"] = r.PlotType.ALTAIR.value + else: + component_config ["plot_type"] = "unknown" + elif file_ext == ".md": + component_config ["component_type"] = r.ComponentType.MARKDOWN.value + else: + error_msg = ( + f"Unsupported file extension: {file_ext}. " + f"Supported extensions include:\n" + f" - Network formats: {', '.join(fmt.value_with_dot for fmt in r.NetworkFormat)}\n" + f" - DataFrame formats: {', '.join(fmt.value_with_dot for fmt in r.DataFrameFormat)}" + ) + #self.logger.error(error_msg) + raise ValueError(error_msg) + + return component_config + + def _sort_paths_by_numprefix(self, paths: List[Path]) -> List[Path]: + """ + Sorts a list of Paths by numeric prefixes in their names, placing non-numeric items at the end. + + Parameters + ---------- + paths : List[Path] + The list of Path objects to sort. + + Returns + ------- + List[Path] + The sorted list of Path objects. + """ + def get_sort_key(path: Path) -> tuple: + parts = path.name.split("_", 1) + if parts[0].isdigit(): + numeric_prefix = int(parts[0]) + else: + # Non-numeric prefixes go to the end + numeric_prefix = float('inf') + return numeric_prefix, path.name.lower() + + return sorted(paths, key=get_sort_key) + + def _create_subsect_config_fromdir(self, subsection_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: + """ + Creates subsection config from a directory. + + Parameters + ---------- + subsection_dir_path : Path + Path to the subsection directory. + + Returns + ------- + Dict[str, Union[str, List[Dict]]] + The subsection config. + """ + subsection_config = { + "title": self._create_title_fromdir(subsection_dir_path.name), + "description": "", + "components": [], + } + + # Sort files by number prefix + sorted_files = self._sort_paths_by_numprefix(list(subsection_dir_path.iterdir())) + + for file in sorted_files: + if file.is_file(): + component_config = self._create_component_config_fromfile(file) + + # Ensure the file path is absolute + file_path = file.resolve() + + component_config_updt = { + "title": self._create_title_fromdir(file.name), + "file_path": str(file_path), + "description": "", + } + + # Update inferred config information + component_config.update(component_config_updt) + + subsection_config["components"].append(component_config) + + return subsection_config + + def _create_sect_config_fromdir(self, section_dir_path: Path) -> Dict[str, Union[str, List[Dict]]]: + """ + Creates section config from a directory. + + Parameters + ---------- + section_dir_path : Path + Path to the section directory. + + Returns + ------- + Dict[str, Union[str, List[Dict]]] + The section config. + """ + section_config = { + "title": self._create_title_fromdir(section_dir_path.name), + "description": "", + "subsections": [], + } + + # Sort subsections by number prefix + sorted_subsections = self._sort_paths_by_numprefix(list(section_dir_path.iterdir())) + + for subsection_dir in sorted_subsections: + if subsection_dir.is_dir(): + section_config["subsections"].append(self._create_subsect_config_fromdir(subsection_dir)) + + return section_config + + + def _resolve_base_dir(self, base_dir: str) -> Path: + """ + Resolves the provided base directory to an absolute path from the root, accounting for relative paths. + + Parameters + ---------- + base_dir : str + The relative or absolute path to the base directory. + + Returns + ------- + Path + The absolute path to the base directory. + """ + # Check if we are in a subdirectory and need to go up one level + project_dir = Path(__file__).resolve().parents[1] + + # If the base_dir is a relative path, resolve it from the project root + base_dir_path = project_dir / base_dir + + # Make sure the resolved base directory exists + if not base_dir_path.is_dir(): + raise ValueError(f"Base directory '{base_dir}' does not exist or is not a directory.") + + return base_dir_path + + + def create_yamlconfig_fromdir(self, base_dir: str) -> Tuple[Dict[str, Union[str, List[Dict]]], Path]: + """ + Generates a YAML-compatible config file from a directory. It also returns the resolved folder path. + + Parameters + ---------- + base_dir : str + The base directory containing section and subsection folders. + + Returns + ------- + Tuple[Dict[str, Union[str, List[Dict]]], Path] + The YAML config and the resolved directory path. + """ + # Get absolute path from base directory + base_dir_path = self._resolve_base_dir(base_dir) + + # Generate the YAML config + yaml_config = { + "report": { + "title": self._create_title_fromdir(base_dir_path.name), + "description": "", + "graphical_abstract": "", + "logo": "", + }, + "sections": [], + } + + # Sort sections by their number prefix + sorted_sections = self._sort_paths_by_numprefix(list(base_dir_path.iterdir())) + + # Generate sections and subsections config + for section_dir in sorted_sections: + if section_dir.is_dir(): + yaml_config["sections"].append(self._create_sect_config_fromdir(section_dir)) + + return yaml_config, base_dir_path + def initialize_report(self, config: dict) -> tuple[r.Report, dict]: """ Extracts report metadata from a YAML config file and returns a Report object and the raw metadata. diff --git a/vuegen/main.py b/vuegen/main.py index 9d1af50..1890a10 100644 --- a/vuegen/main.py +++ b/vuegen/main.py @@ -1,22 +1,34 @@ +from pathlib import Path + from vuegen import report_generator -from vuegen.utils import get_args, get_logger, load_yaml_config +from vuegen.utils import get_args, get_logger if __name__ == '__main__': # Parse command-line arguments args = get_args(prog_name="VueGen") + + # Determine the configuration file path or directory config_path = args.config + dir_path = args.directory + + # Report type report_type = args.report_type - - # Load the YAML configuration file with the report metadata - report_config = load_yaml_config(config_path) + + # Determine the report name for logger suffix + if config_path: + report_name = Path(config_path).stem + else: + report_name = Path(dir_path).name # Define logger suffix based on report type and name - report_title = report_config['report'].get('title') - logger_suffix = f"{report_type}_report_{report_title}" + logger_suffix = f"{report_type}_report_{report_name}" # Initialize logger logger = get_logger(f"{logger_suffix}") # Generate the report - report_generator.get_report(config = report_config, report_type = report_type, logger = logger) + report_generator.get_report(config_path = config_path, + dir_path = dir_path, + report_type = report_type, + logger = logger) diff --git a/vuegen/quarto_reportview.py b/vuegen/quarto_reportview.py index 3cf829a..f23b188 100644 --- a/vuegen/quarto_reportview.py +++ b/vuegen/quarto_reportview.py @@ -338,7 +338,7 @@ def _generate_plot_code(self, plot, output_file = "") -> str: """ # Initialize plot code with common structure plot_code = f"""```{{python}} -#| label: '{plot.title}' +#| label: '{plot.title} {plot.id}' #| fig-cap: "" """ # If the file path is a URL, generate code to fetch content via requests @@ -394,7 +394,7 @@ def _generate_dataframe_content(self, dataframe, is_report_static) -> List[str]: # Append header for DataFrame loading dataframe_content.append(f"""```{{python}} -#| label: '{dataframe.title}' +#| label: '{dataframe.title} {dataframe.id}' #| fig-cap: "" """) # Mapping of file extensions to read functions @@ -402,7 +402,8 @@ def _generate_dataframe_content(self, dataframe, is_report_static) -> List[str]: r.DataFrameFormat.CSV.value_with_dot: pd.read_csv, r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet, r.DataFrameFormat.TXT.value_with_dot: pd.read_table, - r.DataFrameFormat.XLS.value_with_dot: pd.read_excel + r.DataFrameFormat.XLS.value_with_dot: pd.read_excel, + r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel } try: # Check if the file extension matches any DataFrameFormat value @@ -452,7 +453,7 @@ def _generate_markdown_content(self, markdown) -> List[str]: # Initialize md code with common structure markdown_content.append(f""" ```{{python}} -#| label: '{markdown.title}' +#| label: '{markdown.title} {markdown.id}' #| fig-cap: ""\n""") # If the file path is a URL, generate code to fetch content via requests if is_url(markdown.file_path): @@ -481,7 +482,7 @@ def _generate_markdown_content(self, markdown) -> List[str]: def _generate_image_content(self, image_path: str, alt_text: str = "", width: int = 650, height: int = 400) -> str: """ - Adds an image to the content list in a centered format with a specified width. + Adds an image to the content list in an HTML format with a specified width and height. Parameters ---------- @@ -490,20 +491,25 @@ def _generate_image_content(self, image_path: str, alt_text: str = "", width: in width : int, optional Width of the image in pixels (default is 650). height : int, optional - Height of the image in pixels (default is 500). + Height of the image in pixels (default is 400). alt_text : str, optional Alternative text for the image (default is an empty string). Returns ------- str - The formatted image content. + The formatted HTML image content. """ - # Check if the image path is a URL or a local file path if is_url(image_path): - return f"""{{ width={width}px height={height}px fig-align="center"}}\n""" + src = image_path else: - return f"""}){{ width={width}px height={height}px fig-align="center"}}\n""" + src = os.path.abspath(image_path) + + # Return the HTML content + return f""" +