Skip to content

Commit 2b40269

Browse files
committed
More generic metadata handler
1 parent 2e158c2 commit 2b40269

File tree

2 files changed

+35
-18
lines changed

2 files changed

+35
-18
lines changed

pandas_to_postgres/copy_hdf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(
2929
self.hdf_tables = hdf_tables
3030

3131
# Info from the HDFMetadata object
32-
self.levels = hdf_meta.levels
32+
self.hdf_metadata = hdf_meta.metadata_vars
3333
self.file_name = hdf_meta.file_name
3434
self.hdf_chunksize = hdf_meta.chunksize
3535

pandas_to_postgres/utilities.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,52 @@
1515

1616

1717
class HDFMetadata(object):
18+
"""Collect applicable metadata from HDFStore to use when running copy"""
19+
1820
def __init__(
1921
self,
20-
file_name: str = "./data.h5",
22+
file_name: str,
2123
keys: List[str] = None,
2224
chunksize: int = 10 ** 7,
25+
metadata_attr: str = None,
26+
metadata_keys: List[str] = [],
2327
):
2428
self.file_name = file_name
2529
self.chunksize = chunksize
2630
self.sql_to_hdf = defaultdict(set)
27-
self.levels = {}
31+
self.metadata_vars = defaultdict(dict)
32+
"""
33+
Parameters
34+
----------
35+
file_name: path to hdf file to copy from
36+
keys: list of hdf keys to copy data from
37+
chunksize: maximum rows read from an hdf file into a pandas dataframe
38+
metadata_attr: location of relevant metadata in store.get_storer().attrs
39+
metadata_keys: list of keys to get from metadata store
40+
"""
2841

2942
with HDFStore(self.file_name, mode="r") as store:
3043
self.keys = keys or store.keys()
3144

32-
for key in self.keys:
33-
try:
34-
metadata = store.get_storer(key).attrs.atlas_metadata
35-
logger.info(f"Metadata: {metadata}")
36-
except AttributeError:
37-
logger.info(f"Attribute Error: Skipping {key}")
38-
continue
39-
40-
self.levels[key] = metadata["levels"]
41-
42-
sql_table = metadata.get("sql_table_name")
43-
if sql_table:
44-
self.sql_to_hdf[sql_table].add(key)
45-
else:
46-
logger.warn(f"No SQL table name found for {key}")
45+
if metadata_attr:
46+
for key in self.keys:
47+
try:
48+
metadata = store.get_storer(key).attrs[metadata_attr]
49+
logger.info(f"{key} metadata: {metadata}")
50+
except (AttributeError, KeyError):
51+
if "/meta" not in key:
52+
logger.info(f"No metadata found for key '{key}'. Skipping")
53+
continue
54+
55+
for mkey in metadata_keys:
56+
self.metadata_vars[mkey][key] = metadata.get(mkey)
57+
58+
sql_table = metadata.get("sql_table_name")
59+
60+
if sql_table:
61+
self.sql_to_hdf[sql_table].add(key)
62+
else:
63+
logger.warn(f"No SQL table name found for {key}")
4764

4865

4966
def create_file_object(df: DataFrame) -> StringIO:

0 commit comments

Comments
 (0)