11import os
22import yaml
33from pathlib import Path
4+ import report as r
45from typing import Dict , List , Union , Tuple
56
67
@@ -25,14 +26,16 @@ def infer_title_from_file_dir_name(filename: str) -> str:
2526 return title .replace ("_" , " " ).title ()
2627
2728
28- def infer_component_metadata (file : Path ) -> Dict [str , Union [str , None ]]:
29+ def infer_component_metadata (file : Path , logger = None ) -> Dict [str , Union [str , None ]]:
2930 """
3031 Infers metadata for a file, including component type, plot type, and additional fields.
3132
3233 Parameters
3334 ----------
3435 file : Path
3536 The file to analyze.
37+ logger : optional
38+ Logger to record errors and warnings.
3639
3740 Returns
3841 -------
@@ -42,42 +45,56 @@ def infer_component_metadata(file: Path) -> Dict[str, Union[str, None]]:
4245 ext = file .suffix .lower ()
4346 metadata = {}
4447
45- # Infer component type
46- if ext in [".png" , ".jpg" , ".jpeg" , ".gif" , ".html" , ".graphml" , ".gml" , ".gexf" , ".cyjs" ]:
47- metadata ["component_type" ] = "plot"
48- if ext in [".png" , ".jpg" , ".jpeg" , ".gif" ]:
49- metadata ["plot_type" ] = "static"
48+ # Infer component type and metadata
49+ if ext in [r .DataFrameFormat .CSV .value_with_dot , r .DataFrameFormat .TXT .value_with_dot ]:
50+ # Check for network-related keywords
51+ if "edgelist" in file .stem .lower ():
52+ metadata ["component_type" ] = r .ComponentType .PLOT .value
53+ metadata ["plot_type" ] = r .PlotType .INTERACTIVE_NETWORK .value
54+ metadata ["csv_network_format" ] = r .CSVNetworkFormat .EDGELIST .value
55+ elif "adjlist" in file .stem .lower ():
56+ metadata ["component_type" ] = r .ComponentType .PLOT .value
57+ metadata ["plot_type" ] = r .PlotType .INTERACTIVE_NETWORK .value
58+ metadata ["csv_network_format" ] = r .CSVNetworkFormat .ADJLIST .value
59+ else :
60+ metadata ["component_type" ] = r .ComponentType .DATAFRAME .value
61+ metadata ["file_format" ] = r .DataFrameFormat .CSV .value if ext == r .DataFrameFormat .CSV .value_with_dot else r .DataFrameFormat .TXT .value
62+ metadata ["delimiter" ] = "," if ext == r .DataFrameFormat .CSV .value_with_dot else "\\ t"
63+ elif ext in [fmt .value_with_dot for fmt in r .DataFrameFormat if fmt not in [r .DataFrameFormat .CSV , r .DataFrameFormat .TXT ]]:
64+ metadata ["component_type" ] = r .ComponentType .DATAFRAME .value
65+ metadata ["file_format" ] = next (fmt .value for fmt in r .DataFrameFormat if fmt .value_with_dot == ext )
66+ elif ext in [fmt .value_with_dot for fmt in r .NetworkFormat ]:
67+ metadata ["component_type" ] = r .ComponentType .PLOT .value
68+ if ext in [
69+ r .NetworkFormat .PNG .value_with_dot ,
70+ r .NetworkFormat .JPG .value_with_dot ,
71+ r .NetworkFormat .JPEG .value_with_dot ,
72+ r .NetworkFormat .SVG .value_with_dot ,
73+ ]:
74+ metadata ["plot_type" ] = r .PlotType .STATIC .value
5075 else :
51- metadata ["plot_type" ] = "interactive_network"
76+ metadata ["plot_type" ] = r . PlotType . INTERACTIVE_NETWORK . value
5277 elif ext == ".json" :
53- metadata ["component_type" ] = "plot"
78+ metadata ["component_type" ] = r . ComponentType . PLOT . value
5479 if "plotly" in file .stem .lower ():
55- metadata ["plot_type" ] = "plotly"
80+ metadata ["plot_type" ] = r . PlotType . PLOTLY . value
5681 elif "altair" in file .stem .lower ():
57- metadata ["plot_type" ] = "altair"
82+ metadata ["plot_type" ] = r . PlotType . ALTAIR . value
5883 else :
5984 metadata ["plot_type" ] = "unknown"
60- elif ext in [".csv" , ".txt" ]:
61- # Check for network-related keywords
62- if "edgelist" in file .stem .lower ():
63- metadata ["component_type" ] = "plot"
64- metadata ["plot_type" ] = "interactive_network"
65- metadata ["csv_network_format" ] = "edgelist"
66- elif "adjlist" in file .stem .lower ():
67- metadata ["component_type" ] = "plot"
68- metadata ["plot_type" ] = "interactive_network"
69- metadata ["csv_network_format" ] = "adjlist"
70- else :
71- metadata ["component_type" ] = "dataframe"
72- metadata ["file_format" ] = ext .lstrip ("." )
73- metadata ["delimiter" ] = "," if ext == ".csv" else "\\ t"
74- elif ext in [".xls" , ".xlsx" , ".parquet" ]:
75- metadata ["component_type" ] = "dataframe"
76- metadata ["file_format" ] = ext .lstrip ("." )
7785 elif ext == ".md" :
78- metadata ["component_type" ] = "markdown"
86+ metadata ["component_type" ] = r . ComponentType . MARKDOWN . value
7987 else :
80- metadata ["component_type" ] = "unknown"
88+ # Unified error for unsupported extensions
89+ error_msg = (
90+ f"Unsupported file extension: { ext } . "
91+ f"Supported extensions include:\n "
92+ f" - Network formats: { ', ' .join (fmt .value_with_dot for fmt in r .NetworkFormat )} \n "
93+ f" - DataFrame formats: { ', ' .join (fmt .value_with_dot for fmt in r .DataFrameFormat )} "
94+ )
95+ if logger :
96+ logger .error (error_msg )
97+ raise ValueError (error_msg )
8198
8299 return metadata
83100
0 commit comments