diff --git a/docs/example_data/Basic_example_vuegen_demo_notebook/2_Dataframes/1_All_formats/2_abundance_table_example_xls.xls b/docs/example_data/Basic_example_vuegen_demo_notebook/2_Dataframes/1_All_formats/2_abundance_table_example_xls.xls index fbadf4c..c96a583 100644 Binary files a/docs/example_data/Basic_example_vuegen_demo_notebook/2_Dataframes/1_All_formats/2_abundance_table_example_xls.xls and b/docs/example_data/Basic_example_vuegen_demo_notebook/2_Dataframes/1_All_formats/2_abundance_table_example_xls.xls differ diff --git a/docs/example_data/Basic_example_vuegen_demo_notebook/2_Dataframes/1_All_formats/5_example_xlsx.xlsx b/docs/example_data/Basic_example_vuegen_demo_notebook/2_Dataframes/1_All_formats/5_example_xlsx.xlsx new file mode 100644 index 0000000..495c653 Binary files /dev/null and b/docs/example_data/Basic_example_vuegen_demo_notebook/2_Dataframes/1_All_formats/5_example_xlsx.xlsx differ diff --git a/src/vuegen/config_manager.py b/src/vuegen/config_manager.py index 26180ba..d139544 100644 --- a/src/vuegen/config_manager.py +++ b/src/vuegen/config_manager.py @@ -74,7 +74,7 @@ def _create_component_config_fromfile(self, file_path: Path) -> Dict[str, str]: file_path.resolve().as_posix() ) # ! needs to be posix for all OS support component_config["description"] = "" - component_config["caption"] = "" + component_config["caption"] = "" # ? It is not populated here # Infer component config if file_ext in [ diff --git a/src/vuegen/quarto_reportview.py b/src/vuegen/quarto_reportview.py index 545da41..19d508c 100644 --- a/src/vuegen/quarto_reportview.py +++ b/src/vuegen/quarto_reportview.py @@ -1,13 +1,14 @@ import os import subprocess import sys +import textwrap from pathlib import Path -from typing import List +from typing import List, Optional import networkx as nx -import pandas as pd from . import report as r +from . import table_utils from .utils import create_folder, get_relative_file_path, is_url, sort_imports @@ -271,10 +272,6 @@ def run_report(self, output_dir: str = BASE_DIR) -> None: [self.quarto_path, "install", "tinytex", "--no-prompt"], check=True, ) - subprocess.run( - [self.quarto_path, "install", "chromium", "--no-prompt"], - check=True, - ) try: subprocess.run( args, @@ -712,19 +709,16 @@ def _generate_dataframe_content(self, dataframe) -> List[str]: # Append header for DataFrame loading dataframe_content.append( - f"""```{{python}} -#| label: '{dataframe.title} {dataframe.id}' -#| fig-cap: "" -""" + textwrap.dedent( + f"""\ + ```{{python}} + #| label: '{dataframe.title} {dataframe.id}' + #| fig-cap: "" + """ + ) ) # Mapping of file extensions to read functions - read_function_mapping = { - r.DataFrameFormat.CSV.value_with_dot: pd.read_csv, - r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet, - r.DataFrameFormat.TXT.value_with_dot: pd.read_table, - r.DataFrameFormat.XLS.value_with_dot: pd.read_excel, - r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel, - } + read_function_mapping = table_utils.read_function_mapping try: # Check if the file extension matches any DataFrameFormat value file_extension = Path(dataframe.file_path).suffix.lower() @@ -740,24 +734,68 @@ def _generate_dataframe_content(self, dataframe) -> List[str]: df_file_path = dataframe.file_path else: df_file_path = get_relative_file_path( - dataframe.file_path, base_path=".." + dataframe.file_path, ) + sheet_names = None + # If the file is an Excel file, get the sheet names + if file_extension in [ + r.DataFrameFormat.XLS.value_with_dot, + r.DataFrameFormat.XLSX.value_with_dot, + ]: + sheet_names = table_utils.get_sheet_names(df_file_path) + if len(sheet_names) > 1: + # If there are multiple sheets, use the first one + self.report.logger.info( + f"Multiple sheets found in the Excel file: {df_file_path}. " + f"Sheets: {sheet_names}" + ) + else: + sheet_names = None + # Build the file path (URL or local file) + if is_url(dataframe.file_path): + df_file_path = dataframe.file_path + else: + df_file_path = get_relative_file_path( + dataframe.file_path, base_path=".." + ) # Load the DataFrame using the correct function read_function = read_function_mapping[file_extension] dataframe_content.append( f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n""" ) - # Display the dataframe dataframe_content.extend(self._show_dataframe(dataframe)) + # Add further sheets + if sheet_names: + for sheet_name in sheet_names[1:]: + dataframe_content.append(f"#### {sheet_name}") + dataframe_content.append( + textwrap.dedent( + f"""\ + ```{{python}} + #| label: '{dataframe.title} {dataframe.id} {sheet_name}' + #| fig-cap: "" + """ + ) + ) + dataframe_content.append( + f"df = pd.{read_function.__name__}('{df_file_path.as_posix()}', " + f"sheet_name='{sheet_name}')\n" + ) + # Display the dataframe + dataframe_content.extend( + self._show_dataframe(dataframe, suffix=sheet_name) + ) + except Exception as e: self.report.logger.error( f"Error generating content for DataFrame: {dataframe.title}. Error: {str(e)}" ) raise # Add caption if available + # ? Where should this come from? if dataframe.caption: dataframe_content.append(f">{dataframe.caption}\n") @@ -787,18 +825,24 @@ def _generate_markdown_content(self, markdown) -> List[str]: try: # Initialize md code with common structure markdown_content.append( - f""" -```{{python}} -#| label: '{markdown.title} {markdown.id}' -#| fig-cap: ""\n""" + textwrap.dedent( + f""" + ```{{python}} + #| label: '{markdown.title} {markdown.id}' + #| fig-cap: "" + """ + ) ) # If the file path is a URL, generate code to fetch content via requests if is_url(markdown.file_path): markdown_content.append( - f""" -response = requests.get('{markdown.file_path}') -response.raise_for_status() -markdown_content = response.text\n""" + textwrap.dedent( + f"""\ + response = requests.get('{markdown.file_path}') + response.raise_for_status() + markdown_content = response.text + """ + ) ) else: # If it's a local file md_rel_path = get_relative_file_path(markdown.file_path, base_path="..") @@ -826,7 +870,7 @@ def _generate_markdown_content(self, markdown) -> List[str]: ) return markdown_content - def _show_dataframe(self, dataframe) -> List[str]: + def _show_dataframe(self, dataframe, suffix: Optional[str] = None) -> List[str]: """ Appends either a static image or an interactive representation of a DataFrame to the content list. @@ -834,6 +878,9 @@ def _show_dataframe(self, dataframe) -> List[str]: ---------- dataframe : DataFrame The DataFrame object containing the data to display. + suffix : str, optional + A suffix to append to the DataFrame image file name like a sheet name + or another identifier (default is None). Returns ------- @@ -843,14 +890,19 @@ def _show_dataframe(self, dataframe) -> List[str]: dataframe_content = [] if self.is_report_static: # Generate path for the DataFrame image - df_image = ( - Path(self.static_dir) / f"{dataframe.title.replace(' ', '_')}.png" - ) + fpath_df_image = Path(self.static_dir) / dataframe.title.replace(" ", "_") + if suffix: + fpath_df_image = fpath_df_image.with_stem( + fpath_df_image.stem + f"_{suffix.replace(' ', '_')}" + ) + fpath_df_image = fpath_df_image.with_suffix(".png") + dataframe_content.append( - f"df.dfi.export('{Path(df_image).relative_to('quarto_report').as_posix()}', max_rows=10, max_cols=5, table_conversion='matplotlib')\n```\n" + f"df.dfi.export('{Path(fpath_df_image).relative_to('quarto_report').as_posix()}'," + " max_rows=10, max_cols=5, table_conversion='matplotlib')\n```\n" ) # Use helper method to add centered image content - dataframe_content.append(self._generate_image_content(df_image)) + dataframe_content.append(self._generate_image_content(fpath_df_image)) else: # Append code to display the DataFrame interactively dataframe_content.append( @@ -961,10 +1013,13 @@ def _generate_component_imports(self, component: r.Component) -> List[str]: "import json", ], }, - "dataframe": [ + "static_dataframe": [ "import pandas as pd", - "from itables import show, init_notebook_mode", "import dataframe_image as dfi", + ], + "interactive_dataframe": [ + "import pandas as pd", + "from itables import show, init_notebook_mode", "init_notebook_mode(all_interactive=True)", ], "markdown": ["import IPython.display as display", "import requests"], @@ -980,7 +1035,10 @@ def _generate_component_imports(self, component: r.Component) -> List[str]: if plot_type in components_imports["plot"]: component_imports.extend(components_imports["plot"][plot_type]) elif component_type == r.ComponentType.DATAFRAME: - component_imports.extend(components_imports["dataframe"]) + if self.is_report_static: + component_imports.extend(components_imports["static_dataframe"]) + else: + component_imports.extend(components_imports["interactive_dataframe"]) elif component_type == r.ComponentType.MARKDOWN: component_imports.extend(components_imports["markdown"]) diff --git a/src/vuegen/streamlit_reportview.py b/src/vuegen/streamlit_reportview.py index eac846d..8ed3a8e 100644 --- a/src/vuegen/streamlit_reportview.py +++ b/src/vuegen/streamlit_reportview.py @@ -5,10 +5,10 @@ from pathlib import Path from typing import List -import pandas as pd from streamlit.web import cli as stcli from . import report as r +from . import table_utils from .utils import create_folder, generate_footer, get_relative_file_path, is_url from .utils.variables import make_valid_identifier @@ -721,13 +721,7 @@ def _generate_dataframe_content(self, dataframe) -> List[str]: ) # Mapping of file extensions to read functions - read_function_mapping = { - r.DataFrameFormat.CSV.value_with_dot: pd.read_csv, - r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet, - r.DataFrameFormat.TXT.value_with_dot: pd.read_table, - r.DataFrameFormat.XLS.value_with_dot: pd.read_excel, - r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel, - } + read_function_mapping = table_utils.read_function_mapping try: # Check if the file extension matches any DataFrameFormat value @@ -738,19 +732,47 @@ def _generate_dataframe_content(self, dataframe) -> List[str]: self.report.logger.error( f"Unsupported file extension: {file_extension}. Supported extensions are: {', '.join(fmt.value for fmt in r.DataFrameFormat)}." ) - - # Load the DataFrame using the correct function - read_function = read_function_mapping[file_extension] + # return [] # Skip execution if unsupported file extension + # Should it not return here? Can we even call the method with an unsupported file extension? # Build the file path (URL or local file) if is_url(dataframe.file_path): df_file_path = dataframe.file_path else: df_file_path = get_relative_file_path(dataframe.file_path) - dataframe_content.append( - f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n""" - ) + if file_extension in [ + r.DataFrameFormat.XLS.value_with_dot, + r.DataFrameFormat.XLSX.value_with_dot, + ]: + dataframe_content.append("selected_sheet = 0") + sheet_names = table_utils.get_sheet_names(dataframe.file_path) + if len(sheet_names) > 1: + # If there are multiple sheets, ask the user to select one + + dataframe_content.append( + textwrap.dedent( + f"""\ + sheet_names = table_utils.get_sheet_names("{dataframe.file_path}") + selected_sheet = st.selectbox("Select a sheet to display", options=sheet_names) + """ + ) + ) + + # Load the DataFrame using the correct function + read_function = read_function_mapping[file_extension] + if file_extension in [ + r.DataFrameFormat.XLS.value_with_dot, + r.DataFrameFormat.XLSX.value_with_dot, + ]: + dataframe_content.append( + f"""df = pd.{read_function.__name__}('{dataframe.file_path}', sheet_name=selected_sheet)\n""" + ) + else: + dataframe_content.append( + f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n""" + ) + # ! Alternative to select box: iterate over sheets in DataFrame # Displays a DataFrame using AgGrid with configurable options. dataframe_content.append( """ @@ -1169,6 +1191,7 @@ def _generate_component_imports(self, component: r.Component) -> List[str]: "dataframe": [ "import pandas as pd", "from st_aggrid import AgGrid, GridOptionsBuilder", + "from vuegen import table_utils", ], "markdown": ["import requests"], "chatbot": ["import time", "import json", "import requests"], diff --git a/src/vuegen/table_utils.py b/src/vuegen/table_utils.py new file mode 100644 index 0000000..b25f413 --- /dev/null +++ b/src/vuegen/table_utils.py @@ -0,0 +1,30 @@ +import pandas as pd + +from . import report as r + +# Mapping of file extensions to read functions +read_function_mapping = { + r.DataFrameFormat.CSV.value_with_dot: pd.read_csv, + r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet, + r.DataFrameFormat.TXT.value_with_dot: pd.read_table, + r.DataFrameFormat.XLS.value_with_dot: pd.read_excel, + r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel, +} + + +def get_sheet_names( + file_path: str, +) -> list[str]: + """Get the sheet names of an Excel file. + + Parameters + ---------- + file_path : str + Path to the Excel file. + + Returns + ------- + list[str] + List of sheet names. + """ + return pd.ExcelFile(file_path).sheet_names