Skip to content

Commit f518170

Browse files
committed
Implement feature to load components (plots, dataframes, and md files) from URLs provided in the config in streamlit and quarto reports
1 parent b366f70 commit f518170

File tree

5 files changed

+279
-111
lines changed

5 files changed

+279
-111
lines changed

report_config_micw2graph.yaml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,21 @@ sections:
2020
- title: "Top 5 species by biome (plotly)"
2121
component_type: "plot"
2222
plot_type: "plotly"
23-
file_path: "example_data/MicW2Graph/top_species_plot_biome.json"
23+
file_path: "https://raw.githubusercontent.com/JosefineTM/DemProt_report/main/example_data/MicW2Graph/top_species_plot_biome.json"
2424
caption: "Optional caption"
2525
- title: "Multiline plot (altair)"
2626
component_type: "plot"
2727
plot_type: "altair"
2828
file_path: "example_data/altair_multilineplot.json"
2929
- title: "Abundance data for all studies (csv)"
3030
component_type: "dataframe"
31-
file_path: "example_data/MicW2Graph/abundance_data_allbiomes.csv"
31+
file_path: "https://raw.githubusercontent.com/JosefineTM/DemProt_report/main/example_data/MicW2Graph/abundance_data_allbiomes.csv"
3232
file_format: "csv"
3333
delimiter: ","
34-
- title: "Abundance data for all studies (excel)"
34+
- title: "Abundance data for all studies (xls)"
3535
component_type: "dataframe"
3636
file_path: "example_data/MicW2Graph/abundance_data_allbiomes.xls"
37-
file_format: "excel"
37+
file_format: "xls"
3838
delimiter: ","
3939
- title: "Sample data"
4040
components:
@@ -58,7 +58,7 @@ sections:
5858
components:
5959
- title: "Markdown example"
6060
component_type: "markdown"
61-
file_path: "example_data/test_md.md"
61+
file_path: "https://raw.githubusercontent.com/JosefineTM/DemProt_report/main/example_data/test_md.md"
6262
- title: "Microbial Association Networks"
6363
subsections:
6464
- title: "Network Visualization1"
@@ -76,10 +76,10 @@ sections:
7676
file_path: "example_data/MicW2Graph/man_example.csv"
7777
- title: "Network Visualization3"
7878
components:
79-
- title: "Network3 (cyjs)"
79+
- title: "Network3 (remote html)"
8080
component_type: "plot"
8181
plot_type: "interactive_network"
82-
file_path: "example_data/MicW2Graph/man_example.cyjs"
82+
file_path: "https://rawcdn.githack.com/JosefineTM/DemProt_report/3fca83b18e8b0666dff8b03fcdcf3d05d0d2541a/example_data/network1.html"
8383
- title: "Network Visualization4"
8484
components:
8585
- title: "Network4 (html)"

vuegen/quarto_reportview.py

Lines changed: 79 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import report as r
44
from typing import List
55
import networkx as nx
6-
from utils import create_folder
6+
import pandas as pd
7+
from utils import create_folder, is_url
78

89
class QuartoReportView(r.ReportView):
910
"""
@@ -329,23 +330,39 @@ def _generate_plot_code(self, plot, output_file = "") -> str:
329330
str
330331
The generated plot code as a string.
331332
"""
332-
# Start with the common data loading code
333+
# Initialize plot code with common structure
333334
plot_code = f"""```{{python}}
334335
#| label: '{plot.title}'
335336
#| fig-cap: ""
337+
"""
338+
# If the file path is a URL, generate code to fetch content via requests
339+
if is_url(plot.file_path):
340+
plot_code += f"""
341+
response = requests.get('{plot.file_path}')
342+
response.raise_for_status()
343+
plot_json = response.text\n"""
344+
else: # If it's a local file
345+
plot_code += f"""
336346
with open('{os.path.join("..", plot.file_path)}', 'r') as plot_file:
337-
plot_data = plot_file.read()
338-
"""
347+
plot_json = plot_file.read()\n"""
339348
# Add specific code for each visualization tool
340349
if plot.plot_type == r.PlotType.PLOTLY:
341-
plot_code += """fig_plotly = pio.from_json(plot_data)
342-
fig_plotly.update_layout(width=950, height=500)
343-
"""
350+
plot_code += """
351+
fig_plotly = pio.from_json(plot_json)
352+
fig_plotly.update_layout(width=950, height=500)\n"""
344353
elif plot.plot_type == r.PlotType.ALTAIR:
345-
plot_code += """fig_altair = alt.Chart.from_json(plot_data).properties(width=900, height=400)"""
354+
plot_code += """fig_altair = alt.Chart.from_json(plot_json).properties(width=900, height=400)"""
346355
elif plot.plot_type == r.PlotType.INTERACTIVE_NETWORK:
347-
plot_code = f"""<div style="text-align: center;">
348-
<iframe src="{os.path.join("..", output_file)}" alt="{plot.title} plot" width="800px" height="630px"></iframe>
356+
# Generate the HTML embedding for interactive networks
357+
if is_url(plot.file_path) and plot.file_path.endswith('.html'):
358+
iframe_src = output_file
359+
else:
360+
iframe_src = os.path.join("..", output_file)
361+
362+
# Embed the HTML file in an iframe
363+
plot_code = f"""
364+
<div style="text-align: center;">
365+
<iframe src="{iframe_src}" alt="{plot.title} plot" width="800px" height="630px"></iframe>
349366
</div>\n"""
350367
return plot_code
351368

@@ -365,46 +382,47 @@ def _generate_dataframe_content(self, dataframe, is_report_static) -> List[str]:
365382
list : List[str]
366383
The list of content lines for the DataFrame.
367384
"""
368-
datframe_content = []
385+
dataframe_content = []
369386
# Add title
370-
datframe_content.append(f'### {dataframe.title}')
387+
dataframe_content.append(f'### {dataframe.title}')
371388

372389
# Append header for DataFrame loading
373-
datframe_content.append(f"""```{{python}}
390+
dataframe_content.append(f"""```{{python}}
374391
#| label: '{dataframe.title}'
375392
#| fig-cap: ""
376393
""")
394+
# Mapping of file extensions to read functions
395+
read_function_mapping = {
396+
r.DataFrameFormat.CSV.value_with_dot: pd.read_csv,
397+
r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet,
398+
r.DataFrameFormat.TXT.value_with_dot: pd.read_table,
399+
r.DataFrameFormat.XLS.value_with_dot: pd.read_excel
400+
}
377401
try:
378-
if dataframe.file_format == r.DataFrameFormat.CSV:
379-
if dataframe.delimiter:
380-
datframe_content.append(f"""df = pd.read_csv('{os.path.join("..", dataframe.file_path)}', delimiter='{dataframe.delimiter}')""")
381-
datframe_content.extend(self._show_dataframe(dataframe, is_report_static))
382-
else:
383-
datframe_content.append(f"""df = pd.read_csv('{os.path.join("..", dataframe.file_path)}')""")
384-
datframe_content.extend(self._show_dataframe(dataframe, is_report_static))
385-
elif dataframe.file_format == r.DataFrameFormat.PARQUET:
386-
datframe_content.append(f"""df = pd.read_parquet('{os.path.join("..", dataframe.file_path)}')""")
387-
datframe_content.extend(self._show_dataframe(dataframe, is_report_static))
388-
elif dataframe.file_format == r.DataFrameFormat.TXT:
389-
datframe_content.append(f"""df = pd.read_csv('{os.path.join("..", dataframe.file_path)}', sep='\\t')""")
390-
datframe_content.extend(self._show_dataframe(dataframe, is_report_static))
391-
elif dataframe.file_format == r.DataFrameFormat.EXCEL:
392-
datframe_content.append(f"""df = pd.read_excel('{os.path.join("..", dataframe.file_path)}')""")
393-
datframe_content.extend(self._show_dataframe(dataframe, is_report_static))
394-
else:
395-
self.report.logger.error(f"Unsupported DataFrame file format: {dataframe.file_format}")
396-
raise ValueError(f"Unsupported DataFrame file format: {dataframe.file_format}")
402+
# Check if the file extension matches any DataFrameFormat value
403+
file_extension = os.path.splitext(dataframe.file_path)[1].lower()
404+
if not any(file_extension == fmt.value_with_dot for fmt in r.DataFrameFormat):
405+
self.report.logger.error(f"Unsupported file extension: {file_extension}. Supported extensions are: {', '.join(fmt.value for fmt in r.DataFrameFormat)}.")
406+
407+
# Build the file path (URL or local file)
408+
file_path = dataframe.file_path if is_url(dataframe.file_path) else os.path.join("..", dataframe.file_path)
409+
410+
# Load the DataFrame using the correct function
411+
read_function = read_function_mapping[file_extension]
412+
dataframe_content.append(f"""df = pd.{read_function.__name__}('{file_path}')""")
413+
414+
# Display the dataframe
415+
dataframe_content.extend(self._show_dataframe(dataframe, is_report_static))
397416

398417
except Exception as e:
399418
self.report.logger.error(f"Error generating content for DataFrame: {dataframe.title}. Error: {str(e)}")
400419
raise
401-
402420
# Add caption if available
403421
if dataframe.caption:
404-
datframe_content.append(f'>{dataframe.caption}\n')
422+
dataframe_content.append(f'>{dataframe.caption}\n')
405423

406424
self.report.logger.info(f"Successfully generated content for DataFrame: '{dataframe.title}'")
407-
return datframe_content
425+
return dataframe_content
408426

409427
def _generate_markdown_content(self, markdown) -> List[str]:
410428
"""
@@ -425,13 +443,25 @@ def _generate_markdown_content(self, markdown) -> List[str]:
425443
markdown_content.append(f'### {markdown.title}')
426444

427445
try:
428-
markdown_content.append(f"""```{{python}}
446+
# Initialize md code with common structure
447+
markdown_content.append(f"""
448+
```{{python}}
429449
#| label: '{markdown.title}'
430-
#| fig-cap: ""
450+
#| fig-cap: ""\n""")
451+
# If the file path is a URL, generate code to fetch content via requests
452+
if is_url(markdown.file_path):
453+
markdown_content.append(f"""
454+
response = requests.get('{markdown.file_path}')
455+
response.raise_for_status()
456+
markdown_content = response.text\n""")
457+
else: #If it's a local file
458+
markdown_content.append(f"""
431459
with open('{os.path.join("..", markdown.file_path)}', 'r') as markdown_file:
432-
markdown_content = markdown_file.read()
433-
display.Markdown(markdown_content)
434-
```\n""")
460+
markdown_content = markdown_file.read()\n""")
461+
462+
# Code to display md content
463+
markdown_content.append(f"""display.Markdown(markdown_content)\n```\n""")
464+
435465
except Exception as e:
436466
self.report.logger.error(f"Error generating content for Markdown: {markdown.title}. Error: {str(e)}")
437467
raise
@@ -450,7 +480,7 @@ def _generate_image_content(self, image_path: str, alt_text: str = "", width: in
450480
Parameters
451481
----------
452482
image_path : str
453-
Path to the image file.
483+
Path to the image file or a URL to the image.
454484
width : int, optional
455485
Width of the image in pixels (default is 650).
456486
height : int, optional
@@ -463,8 +493,11 @@ def _generate_image_content(self, image_path: str, alt_text: str = "", width: in
463493
str
464494
The formatted image content.
465495
"""
466-
return f"""
467-
![{alt_text}]({os.path.join('..', image_path)}){{ width={width}px height={height}px fig-align="center"}}\n"""
496+
# Check if the image path is a URL or a local file path
497+
if is_url(image_path):
498+
return f"""![{alt_text}]({image_path}){{ width={width}px height={height}px fig-align="center"}}\n"""
499+
else:
500+
return f"""![{alt_text}]({os.path.join('..', image_path)}){{ width={width}px height={height}px fig-align="center"}}\n"""
468501

469502
def _show_dataframe(self, dataframe, is_report_static, static_dir: str = STATIC_FILES_DIR) -> List[str]:
470503
"""
@@ -517,11 +550,11 @@ def _generate_component_imports(self, component: r.Component) -> List[str]:
517550
# Dictionary to hold the imports for each component type
518551
components_imports = {
519552
'plot': {
520-
r.PlotType.ALTAIR: ['import altair as alt'],
521-
r.PlotType.PLOTLY: ['import plotly.io as pio']
553+
r.PlotType.ALTAIR: ['import altair as alt', 'import requests'],
554+
r.PlotType.PLOTLY: ['import plotly.io as pio', 'import requests']
522555
},
523556
'dataframe': ['import pandas as pd', 'from itables import show', 'import dataframe_image as dfi'],
524-
'markdown': ['import IPython.display as display']
557+
'markdown': ['import IPython.display as display', 'import requests']
525558
}
526559

527560
# Iterate over sections and subsections to determine needed imports

vuegen/report.py

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import json
1212
import matplotlib.pyplot as plt
1313
from pyvis.network import Network
14-
from utils import cyjs_to_networkx, pyvishtml_to_networkx
14+
from utils import cyjs_to_networkx, pyvishtml_to_networkx, fetch_file_stream
1515

1616
class ReportType(StrEnum):
1717
STREAMLIT = auto()
@@ -58,7 +58,12 @@ class DataFrameFormat(StrEnum):
5858
CSV = auto()
5959
TXT = auto()
6060
PARQUET = auto()
61-
EXCEL = auto()
61+
XLS = auto()
62+
63+
@property
64+
def value_with_dot(self):
65+
"""Return the file extension with the dot."""
66+
return f".{self.name.lower()}"
6267

6368
@dataclass
6469
class Component():
@@ -134,52 +139,55 @@ def read_network(self) -> nx.Graph:
134139
-------
135140
G : networkx.Graph
136141
A NetworkX graph object created from the specified network file.
142+
143+
Raises
144+
------
145+
ValueError
146+
If the file format is unsupported.
147+
FileNotFoundError
148+
If the file cannot be accessed or is missing.
149+
RuntimeError
150+
If there is an error while reading the network file.
137151
"""
138-
# Mapping of file extensions to NetworkX loading functions
152+
# Mapping of file extensions to NetworkX and custom loading functions
139153
file_extension_map = {
140154
NetworkFormat.GML.value_with_dot: nx.read_gml,
141155
NetworkFormat.GRAPHML.value_with_dot: nx.read_graphml,
142156
NetworkFormat.GEXF.value_with_dot: nx.read_gexf,
143157
NetworkFormat.CYJS.value_with_dot: cyjs_to_networkx
144158
}
145159

146-
# Check if the file exists
147-
if not os.path.isfile(self.file_path):
148-
self.logger.error(f"File not found or cannot be accessed: {self.file_path}.")
149-
raise FileNotFoundError(f"The file at {self.file_path} was not found or cannot be accessed.")
150-
151-
# Determine the file extension and check if it is supported
152-
file_extension = os.path.splitext(self.file_path)[-1].lower()
153-
154-
# Check if the file extension matches any Enum value
155-
if not any(file_extension == fmt.value_with_dot for fmt in NetworkFormat):
156-
self.logger.error(f"Unsupported file extension: {file_extension}.")
157-
raise ValueError(
158-
f"Unsupported file extension: {file_extension}. Supported extensions are: "
159-
f"{', '.join(fmt.value for fmt in NetworkFormat)}."
160-
)
161-
162160
# Handle .csv and .txt files with custom delimiters based on the text format (edgelist or adjlist)
163161
try:
164-
# Handle HTML files (for pyvis interactive networks)
162+
# Fetch the file stream (local or URL) using fetch_file_stream
163+
file_stream = fetch_file_stream(self.file_path)
164+
165+
# Determine the file extension and check if it is supported
166+
file_extension = os.path.splitext(self.file_path)[-1].lower()
167+
168+
# Check if the file extension matches any Enum value
169+
if not any(file_extension == fmt.value_with_dot for fmt in NetworkFormat):
170+
self.logger.error(f"Unsupported file extension: {file_extension}. Supported extensions are: {', '.join(fmt.value for fmt in NetworkFormat)}.")
171+
172+
# Handle HTML files for pyvis interactive networks
165173
if file_extension == NetworkFormat.HTML.value_with_dot:
166-
G = pyvishtml_to_networkx(self.file_path)
174+
G = pyvishtml_to_networkx(file_stream)
167175
return (G, self.file_path)
168176

169-
# Handle .csv and .txt files with custom delimiters based on the text format (edgelist or adjlist)
177+
# Handle CSV and TXT files with custom delimiters based on the text format (edgelist or adjlist)
170178
if file_extension in [NetworkFormat.CSV.value_with_dot, NetworkFormat.TXT.value_with_dot] and self.csv_network_format:
171179
delimiter = ',' if file_extension == '.csv' else '\\t'
172180
try:
173-
df_net = pd.read_csv(self.file_path, delimiter=delimiter)
181+
df_net = pd.read_csv(file_stream, delimiter=delimiter)
174182
except pd.errors.ParserError:
175183
self.logger.error(f"Error parsing CSV/TXT file {self.file_path}. Please check the file format or delimiter.")
176-
raise ValueError(f"Error parsing the file {self.file_path}. Please check the file format or delimiter.")
177184

178185
if self.csv_network_format == CSVNetworkFormat.EDGELIST:
179186
# Assert that "source" and "target" columns are present in the DataFrame
180187
required_columns = {"source", "target"}
181188
if not required_columns.issubset(df_net.columns):
182-
self.logger.warning(f"CSV network file must contain columns named 'source' and 'target'. Missing columns: {', '.join(required_columns.difference(df_net.columns))}.")
189+
missing_cols = ", ".join(required_columns.difference(df_net.columns))
190+
self.logger.error(f"CSV network file must contain 'source' and 'target' columns. Missing columns: {missing_cols}.")
183191

184192
# Use additional columns as edge attributes, excluding "source" and "target"
185193
edge_attributes = [col for col in df_net.columns if col not in required_columns]
@@ -194,10 +202,9 @@ def read_network(self) -> nx.Graph:
194202
return G
195203
else:
196204
self.logger.error(f"Unsupported format for CSV/TXT file: {self.csv_network_format}.")
197-
raise ValueError(f"Unsupported format for CSV/TXT file: {self.csv_network_format}")
198205

199206
# Handle other formats using the mapping and return the NetworkX graph object from the specified network file
200-
G = file_extension_map[file_extension](self.file_path)
207+
G = file_extension_map[file_extension](file_stream)
201208
G = self._add_size_attribute(G)
202209
self.logger.info(f"Successfully read network from file: {self.file_path}.")
203210
return G

0 commit comments

Comments
 (0)