@@ -61,8 +61,13 @@ def load_from_dict(cls, metadata_dict, single_table_name=None):
6161 instance ._set_metadata_dict (metadata_dict , single_table_name )
6262 return instance
6363
64+ @staticmethod
65+ def _validate_infer_sdtypes (infer_sdtypes ):
66+ if not isinstance (infer_sdtypes , bool ):
67+ raise ValueError ("'infer_sdtypes' must be a boolean value." )
68+
6469 @classmethod
65- def detect_from_dataframes (cls , data ):
70+ def detect_from_dataframes (cls , data , infer_sdtypes = True , infer_keys = 'primary_and_foreign' ):
6671 """Detect the metadata for all tables in a dictionary of dataframes.
6772
6873 This method automatically detects the ``sdtypes`` for the given ``pandas.DataFrames``.
@@ -71,23 +76,50 @@ def detect_from_dataframes(cls, data):
7176 Args:
7277 data (dict):
7378 Dictionary of table names to dataframes.
79+ infer_sdtypes (bool):
80+ A boolean describing whether to infer the sdtypes of each column.
81+ If True it infers the sdtypes based on the data.
82+ If False it does not infer the sdtypes and all columns are marked as unknown.
83+ Defaults to True.
84+ infer_keys (str):
85+ A string describing whether to infer the primary and/or foreign keys. Options are:
86+ - 'primary_and_foreign': Infer the primary keys in each table,
87+ and the foreign keys in other tables that refer to them
88+ - 'primary_only': Infer only the primary keys of each table
89+ - None: Do not infer any keys
90+ Defaults to 'primary_and_foreign'.
7491
7592 Returns:
7693 Metadata:
7794 A new metadata object with the sdtypes detected from the data.
7895 """
7996 if not data or not all (isinstance (df , pd .DataFrame ) for df in data .values ()):
8097 raise ValueError ('The provided dictionary must contain only pandas DataFrame objects.' )
98+ if infer_keys not in ['primary_and_foreign' , 'primary_only' , None ]:
99+ raise ValueError (
100+ "'infer_keys' must be one of: 'primary_and_foreign', 'primary_only', None."
101+ )
102+ cls ._validate_infer_sdtypes (infer_sdtypes )
81103
82104 metadata = Metadata ()
83105 for table_name , dataframe in data .items ():
84- metadata .detect_table_from_dataframe (table_name , dataframe )
106+ metadata .detect_table_from_dataframe (
107+ table_name , dataframe , infer_sdtypes , None if infer_keys is None else 'primary_only'
108+ )
109+
110+ if infer_keys == 'primary_and_foreign' :
111+ metadata ._detect_relationships (data )
85112
86- metadata ._detect_relationships (data )
87113 return metadata
88114
89115 @classmethod
90- def detect_from_dataframe (cls , data , table_name = DEFAULT_SINGLE_TABLE_NAME ):
116+ def detect_from_dataframe (
117+ cls ,
118+ data ,
119+ table_name = DEFAULT_SINGLE_TABLE_NAME ,
120+ infer_sdtypes = True ,
121+ infer_keys = 'primary_only' ,
122+ ):
91123 """Detect the metadata for a DataFrame.
92124
93125 This method automatically detects the ``sdtypes`` for the given ``pandas.DataFrame``.
@@ -96,16 +128,29 @@ def detect_from_dataframe(cls, data, table_name=DEFAULT_SINGLE_TABLE_NAME):
96128 Args:
97129 data (pandas.DataFrame):
98130 Dictionary of table names to dataframes.
131+ infer_sdtypes (bool):
132+ A boolean describing whether to infer the sdtypes of each column.
133+ If True it infers the sdtypes based on the data.
134+ If False it does not infer the sdtypes and all columns are marked as unknown.
135+ Defaults to True.
136+ infer_keys (str):
137+ A string describing whether to infer the primary keys. Options are:
138+ - 'primary_only': Infer only the primary keys of each table
139+ - None: Do not infer any keys
140+ Defaults to 'primary_only'.
99141
100142 Returns:
101143 Metadata:
102144 A new metadata object with the sdtypes detected from the data.
103145 """
104146 if not isinstance (data , pd .DataFrame ):
105147 raise ValueError ('The provided data must be a pandas DataFrame object.' )
148+ if infer_keys not in ['primary_only' , None ]:
149+ raise ValueError ("'infer_keys' must be one of: 'primary_only', None." )
150+ cls ._validate_infer_sdtypes (infer_sdtypes )
106151
107152 metadata = Metadata ()
108- metadata .detect_table_from_dataframe (table_name , data )
153+ metadata .detect_table_from_dataframe (table_name , data , infer_sdtypes , infer_keys )
109154 return metadata
110155
111156 def _set_metadata_dict (self , metadata , single_table_name = None ):
0 commit comments