|
15 | 15 |
|
16 | 16 |
|
17 | 17 | class HDFMetadata(object):
|
| 18 | + """Collect applicable metadata from HDFStore to use when running copy""" |
| 19 | + |
18 | 20 | def __init__(
|
19 | 21 | self,
|
20 |
| - file_name: str = "./data.h5", |
| 22 | + file_name: str, |
21 | 23 | keys: List[str] = None,
|
22 | 24 | chunksize: int = 10 ** 7,
|
| 25 | + metadata_attr: str = None, |
| 26 | + metadata_keys: List[str] = [], |
23 | 27 | ):
|
24 | 28 | self.file_name = file_name
|
25 | 29 | self.chunksize = chunksize
|
26 | 30 | self.sql_to_hdf = defaultdict(set)
|
27 |
| - self.levels = {} |
| 31 | + self.metadata_vars = defaultdict(dict) |
| 32 | + """ |
| 33 | + Parameters |
| 34 | + ---------- |
| 35 | + file_name: path to hdf file to copy from |
| 36 | + keys: list of hdf keys to copy data from |
| 37 | + chunksize: maximum rows read from an hdf file into a pandas dataframe |
| 38 | + metadata_attr: location of relevant metadata in store.get_storer().attrs |
| 39 | + metadata_keys: list of keys to get from metadata store |
| 40 | + """ |
28 | 41 |
|
29 | 42 | with HDFStore(self.file_name, mode="r") as store:
|
30 | 43 | self.keys = keys or store.keys()
|
31 | 44 |
|
32 |
| - for key in self.keys: |
33 |
| - try: |
34 |
| - metadata = store.get_storer(key).attrs.atlas_metadata |
35 |
| - logger.info(f"Metadata: {metadata}") |
36 |
| - except AttributeError: |
37 |
| - logger.info(f"Attribute Error: Skipping {key}") |
38 |
| - continue |
39 |
| - |
40 |
| - self.levels[key] = metadata["levels"] |
41 |
| - |
42 |
| - sql_table = metadata.get("sql_table_name") |
43 |
| - if sql_table: |
44 |
| - self.sql_to_hdf[sql_table].add(key) |
45 |
| - else: |
46 |
| - logger.warn(f"No SQL table name found for {key}") |
| 45 | + if metadata_attr: |
| 46 | + for key in self.keys: |
| 47 | + try: |
| 48 | + metadata = store.get_storer(key).attrs[metadata_attr] |
| 49 | + logger.info(f"{key} metadata: {metadata}") |
| 50 | + except (AttributeError, KeyError): |
| 51 | + if "/meta" not in key: |
| 52 | + logger.info(f"No metadata found for key '{key}'. Skipping") |
| 53 | + continue |
| 54 | + |
| 55 | + for mkey in metadata_keys: |
| 56 | + self.metadata_vars[mkey][key] = metadata.get(mkey) |
| 57 | + |
| 58 | + sql_table = metadata.get("sql_table_name") |
| 59 | + |
| 60 | + if sql_table: |
| 61 | + self.sql_to_hdf[sql_table].add(key) |
| 62 | + else: |
| 63 | + logger.warn(f"No SQL table name found for {key}") |
47 | 64 |
|
48 | 65 |
|
49 | 66 | def create_file_object(df: DataFrame) -> StringIO:
|
|
0 commit comments