Skip to content

Commit 04eacaf

Browse files
committed
Type hinting + docstrings for BaseCopy
1 parent 0a34ac3 commit 04eacaf

File tree

3 files changed

+46
-31
lines changed

3 files changed

+46
-31
lines changed

pandas_to_postgres/_base_copy.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,34 @@
1-
from .utilities import (
2-
create_file_object,
3-
df_generator,
4-
logger,
5-
classification_to_pandas,
6-
cast_pandas,
7-
add_level_metadata,
8-
HDFMetadata,
9-
)
10-
11-
import pandas as pd
1+
from .utilities import logger
2+
from io import StringIO
123
from sqlalchemy.schema import AddConstraint, DropConstraint
134
from sqlalchemy.exc import SQLAlchemyError
145
from sqlalchemy.sql.schema import Table
156
from sqlalchemy.engine.base import Connection
167

178

189
class BaseCopy(object):
10+
"""
11+
Parent class for all common attibutes and methods for copy objects
12+
"""
13+
1914
def __init__(
2015
self,
2116
defer_sql_objs: bool = False,
22-
conn=None,
23-
table_obj=None,
24-
sql_table=None,
17+
conn: Connection = None,
18+
table_obj: Table = None,
19+
sql_table: str = None,
2520
csv_chunksize: int = 10 ** 6,
2621
):
22+
"""
23+
Parameters
24+
----------
25+
defer_sql_objs: multiprocessing has issue with passing SQLALchemy objects, so if
26+
True, defer attributing these to the object until after pickled by Pool
27+
conn: SQLAlchemy connection managed outside of the object
28+
table_obj: SQLAlchemy object for the destination SQL Table
29+
sql_table: string of SQL table name
30+
csv_chunksize: max rows to keep in memory when generating CSV for COPY
31+
"""
2732

2833
self.rows = 0
2934
self.columns = None
@@ -46,6 +51,10 @@ def instantiate_sql_objs(self, conn, table_obj):
4651
self.foreign_keys = table_obj.foreign_key_constraints
4752

4853
def drop_pk(self):
54+
"""
55+
Drop primary key constraints on PostgreSQL table as well as CASCADE any other
56+
constraints that may rely on the PK
57+
"""
4958
logger.info(f"Dropping {self.sql_table} primary key")
5059
try:
5160
with self.conn.begin_nested():
@@ -54,10 +63,12 @@ def drop_pk(self):
5463
logger.info(f"{self.sql_table} primary key not found. Skipping")
5564

5665
def create_pk(self):
66+
"""Create primary key constraints on PostgreSQL table"""
5767
logger.info(f"Creating {self.sql_table} primary key")
5868
self.conn.execute(AddConstraint(self.primary_key))
5969

6070
def drop_fks(self):
71+
"""Drop foreign key constraints on PostgreSQL table"""
6172
for fk in self.foreign_keys:
6273
logger.info(f"Dropping foreign key {fk.name}")
6374
try:
@@ -67,6 +78,7 @@ def drop_fks(self):
6778
logger.warn(f"Foreign key {fk.name} not found")
6879

6980
def create_fks(self):
81+
"""Create foreign key constraints on PostgreSQL table"""
7082
for fk in self.foreign_keys:
7183
try:
7284
logger.info(f"Creating foreign key {fk.name}")
@@ -75,14 +87,17 @@ def create_fks(self):
7587
logger.warn(f"Error creating foreign key {fk.name}")
7688

7789
def truncate(self):
90+
"""TRUNCATE PostgreSQL table"""
7891
logger.info(f"Truncating {self.sql_table}")
7992
self.conn.execute(f"TRUNCATE TABLE {self.sql_table};")
8093

8194
def analyze(self):
95+
"""Run ANALYZE on PostgreSQL table"""
8296
logger.info(f"Analyzing {self.sql_table}")
8397
self.conn.execute(f"ANALYZE {self.sql_table};")
8498

85-
def copy_from_file(self, file_object):
99+
def copy_from_file(self, file_object: StringIO):
100+
"""COPY to PostgreSQL table using StringIO CSV object"""
86101
cur = self.conn.connection.cursor()
87102
cols = ", ".join([f"{col}" for col in self.columns])
88103
sql = f"COPY {self.sql_table} ({cols}) FROM STDIN WITH CSV HEADER FREEZE"

pandas_to_postgres/copy_df.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(
2323
csv_chunksize: int = 10 ** 6,
2424
levels: dict = None,
2525
):
26-
BaseCopy(defer_sql_objs, conn, table_obj, csv_chunksize)
26+
super().__init__(defer_sql_objs, conn, table_obj, csv_chunksize)
2727

2828
self.df = df
2929
self.levels = levels

pandas_to_postgres/copy_hdf.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
)
1010

1111
from ._base_copy import BaseCopy
12-
12+
from typing import List
1313
import pandas as pd
1414
from sqlalchemy.sql.schema import Table
1515
from sqlalchemy.engine.base import Connection
@@ -18,15 +18,15 @@
1818
class HDFTableCopy(BaseCopy):
1919
def __init__(
2020
self,
21-
hdf_tables: list,
21+
hdf_tables: List[str],
2222
hdf_meta: HDFMetadata,
2323
defer_sql_objs: bool = False,
24-
conn=None,
25-
table_obj=None,
26-
sql_table=None,
24+
conn: Connection = None,
25+
table_obj: Table = None,
26+
sql_table: str = None,
2727
csv_chunksize: int = 10 ** 6,
2828
):
29-
BaseCopy.__init__(
29+
super().__init__(
3030
self, defer_sql_objs, conn, table_obj, sql_table, csv_chunksize
3131
)
3232

@@ -86,15 +86,15 @@ def hdf_to_pg(self):
8686
class ClassificationHDFTableCopy(HDFTableCopy):
8787
def __init__(
8888
self,
89-
hdf_tables: list,
89+
hdf_tables: List[str],
9090
hdf_meta: HDFMetadata,
9191
defer_sql_objs: bool = False,
92-
conn=None,
93-
table_obj=None,
92+
conn: Connection = None,
93+
table_obj: Table = None,
9494
sql_table: str = None,
9595
csv_chunksize: int = 10 ** 6,
9696
):
97-
HDFTableCopy.__init__(
97+
super().__init__(
9898
self,
9999
hdf_tables,
100100
hdf_meta,
@@ -136,15 +136,15 @@ def hdf_to_pg(self):
136136
class BigHDFTableCopy(HDFTableCopy):
137137
def __init__(
138138
self,
139-
hdf_tables: list,
139+
hdf_tables: List[str],
140140
hdf_meta: HDFMetadata,
141141
defer_sql_objs: bool = False,
142-
conn=None,
143-
table_obj=None,
144-
sql_table=None,
142+
conn: Connection = None,
143+
table_obj: Table = None,
144+
sql_table: str = None,
145145
csv_chunksize: int = 10 ** 6,
146146
):
147-
HDFTableCopy.__init__(
147+
super().__init__(
148148
self,
149149
hdf_tables,
150150
hdf_meta,

0 commit comments

Comments
 (0)