|
5 | 5 | classification_to_pandas,
|
6 | 6 | cast_pandas,
|
7 | 7 | add_level_metadata,
|
| 8 | + HDFMetadata, |
8 | 9 | )
|
9 | 10 |
|
10 |
| -import pandas as pd |
11 |
| -from sqlalchemy.schema import AddConstraint, DropConstraint |
12 |
| -from sqlalchemy.exc import SQLAlchemyError |
13 |
| - |
| 11 | +from ._base_copy import BaseCopy |
14 | 12 |
|
15 |
| -class HDFTableCopy(object): |
16 |
| - |
17 |
| - rows = 0 |
18 |
| - columns = None |
| 13 | +import pandas as pd |
| 14 | +from sqlalchemy.sql.schema import Table |
| 15 | +from sqlalchemy.engine.base import Connection |
| 16 | + |
| 17 | + |
| 18 | +class HDFTableCopy(BaseCopy): |
| 19 | + def __init__( |
| 20 | + self, |
| 21 | + hdf_tables: list, |
| 22 | + hdf_meta: HDFMetadata, |
| 23 | + defer_sql_objs: bool = False, |
| 24 | + conn=None, |
| 25 | + table_obj=None, |
| 26 | + sql_table=None, |
| 27 | + csv_chunksize: int = 10 ** 6, |
| 28 | + ): |
| 29 | + BaseCopy.__init__( |
| 30 | + self, defer_sql_objs, conn, table_obj, sql_table, csv_chunksize |
| 31 | + ) |
19 | 32 |
|
20 |
| - def __init__(self, sql_table, hdf_tables, hdf_meta, csv_chunksize=10 ** 6): |
21 |
| - self.sql_table = sql_table |
22 | 33 | self.hdf_tables = hdf_tables
|
23 |
| - self.csv_chunksize = csv_chunksize |
24 | 34 |
|
25 | 35 | # Info from the HDFMetadata object
|
26 | 36 | self.levels = hdf_meta.levels
|
27 | 37 | self.file_name = hdf_meta.file_name
|
28 | 38 | self.hdf_chunksize = hdf_meta.chunksize
|
29 | 39 |
|
30 |
| - def table_metadata(self): |
31 |
| - self.table_obj = db.metadata.tables[self.sql_table] |
32 |
| - self.primary_key = self.table_obj.primary_key |
33 |
| - self.foreign_keys = self.table_obj.foreign_key_constraints |
34 |
| - |
35 |
| - def set_conn(self, conn): |
36 |
| - self.conn = conn |
37 |
| - |
38 |
| - def delete_conn(self): |
39 |
| - del self.conn |
40 |
| - |
41 |
| - def drop_pk(self): |
42 |
| - logger.info(f"Dropping {self.sql_table} primary key") |
43 |
| - try: |
44 |
| - with self.conn.begin_nested(): |
45 |
| - self.conn.execute(DropConstraint(self.primary_key, cascade=True)) |
46 |
| - except SQLAlchemyError: |
47 |
| - logger.info(f"{self.sql_table} primary key not found. Skipping") |
48 |
| - |
49 |
| - def create_pk(self): |
50 |
| - logger.info(f"Creating {self.sql_table} primary key") |
51 |
| - self.conn.execute(AddConstraint(self.primary_key)) |
52 |
| - |
53 |
| - def drop_fks(self): |
54 |
| - for fk in self.foreign_keys: |
55 |
| - logger.info(f"Dropping foreign key {fk.name}") |
56 |
| - try: |
57 |
| - with self.conn.begin_nested(): |
58 |
| - self.conn.execute(DropConstraint(fk)) |
59 |
| - except SQLAlchemyError: |
60 |
| - logger.warn(f"Foreign key {fk.name} not found") |
61 |
| - |
62 |
| - def create_fks(self): |
63 |
| - for fk in self.foreign_keys: |
64 |
| - try: |
65 |
| - logger.info(f"Creating foreign key {fk.name}") |
66 |
| - self.conn.execute(AddConstraint(fk)) |
67 |
| - except SQLAlchemyError: |
68 |
| - logger.warn(f"Error creating foreign key {fk.name}") |
69 |
| - |
70 |
| - def truncate(self): |
71 |
| - logger.info(f"Truncating {self.sql_table}") |
72 |
| - self.conn.execute(f"TRUNCATE TABLE {self.sql_table};") |
73 |
| - |
74 |
| - def analyze(self): |
75 |
| - logger.info(f"Analyzing {self.sql_table}") |
76 |
| - self.conn.execute(f"ANALYZE {self.sql_table};") |
77 |
| - |
78 |
| - def copy_from_file(self, file_object): |
79 |
| - cur = self.conn.connection.cursor() |
80 |
| - cols = ", ".join([f"{col}" for col in self.columns]) |
81 |
| - sql = f"COPY {self.sql_table} ({cols}) FROM STDIN WITH CSV HEADER FREEZE" |
82 |
| - cur.copy_expert(sql=sql, file=file_object) |
83 |
| - |
84 | 40 | def copy_table(self):
|
85 |
| - self.table_metadata() |
86 | 41 | self.drop_fks()
|
87 | 42 | self.drop_pk()
|
| 43 | + |
| 44 | + # These need to be one transaction to use COPY FREEZE |
88 | 45 | with self.conn.begin():
|
89 | 46 | self.truncate()
|
90 | 47 | self.hdf_to_pg()
|
91 |
| - self.create_pk() |
92 |
| - self.create_fks() |
| 48 | + |
| 49 | + self.create_pk() |
| 50 | + self.create_fks() |
93 | 51 | self.analyze()
|
94 | 52 |
|
95 | 53 | def hdf_to_pg(self):
|
@@ -126,8 +84,26 @@ def hdf_to_pg(self):
|
126 | 84 |
|
127 | 85 |
|
128 | 86 | class ClassificationHDFTableCopy(HDFTableCopy):
|
129 |
| - def __init__(self, sql_table, hdf_tables, hdf_meta, csv_chunksize=10 ** 6): |
130 |
| - HDFTableCopy.__init__(self, sql_table, hdf_tables, hdf_meta, csv_chunksize) |
| 87 | + def __init__( |
| 88 | + self, |
| 89 | + hdf_tables: list, |
| 90 | + hdf_meta: HDFMetadata, |
| 91 | + defer_sql_objs: bool = False, |
| 92 | + conn=None, |
| 93 | + table_obj=None, |
| 94 | + sql_table: str = None, |
| 95 | + csv_chunksize: int = 10 ** 6, |
| 96 | + ): |
| 97 | + HDFTableCopy.__init__( |
| 98 | + self, |
| 99 | + hdf_tables, |
| 100 | + hdf_meta, |
| 101 | + defer_sql_objs, |
| 102 | + conn, |
| 103 | + table_obj, |
| 104 | + sql_table, |
| 105 | + csv_chunksize, |
| 106 | + ) |
131 | 107 |
|
132 | 108 | def hdf_to_pg(self):
|
133 | 109 | if self.hdf_tables is None:
|
@@ -158,8 +134,26 @@ def hdf_to_pg(self):
|
158 | 134 |
|
159 | 135 |
|
160 | 136 | class BigHDFTableCopy(HDFTableCopy):
|
161 |
| - def __init__(self, sql_table, hdf_tables, hdf_meta, csv_chunksize=10 ** 6): |
162 |
| - HDFTableCopy.__init__(self, sql_table, hdf_tables, hdf_meta, csv_chunksize) |
| 137 | + def __init__( |
| 138 | + self, |
| 139 | + hdf_tables: list, |
| 140 | + hdf_meta: HDFMetadata, |
| 141 | + defer_sql_objs: bool = False, |
| 142 | + conn=None, |
| 143 | + table_obj=None, |
| 144 | + sql_table=None, |
| 145 | + csv_chunksize: int = 10 ** 6, |
| 146 | + ): |
| 147 | + HDFTableCopy.__init__( |
| 148 | + self, |
| 149 | + hdf_tables, |
| 150 | + hdf_meta, |
| 151 | + defer_sql_objs, |
| 152 | + conn, |
| 153 | + table_obj, |
| 154 | + sql_table, |
| 155 | + csv_chunksize, |
| 156 | + ) |
163 | 157 |
|
164 | 158 | def hdf_to_pg(self):
|
165 | 159 | if self.hdf_tables is None:
|
|
0 commit comments