Type hinting + docstrings for BaseCopy

bleonard33 · bleonard33 · commit 04eacaf04b1d · 2018-07-13T11:48:06.000-04:00
diff --git a/pandas_to_postgres/_base_copy.py b/pandas_to_postgres/_base_copy.py
@@ -1,29 +1,34 @@
-from .utilities import (
-    create_file_object,
-    df_generator,
-    logger,
-    classification_to_pandas,
-    cast_pandas,
-    add_level_metadata,
-    HDFMetadata,
-)
-
-import pandas as pd
+from .utilities import logger
+from io import StringIO
 from sqlalchemy.schema import AddConstraint, DropConstraint
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.sql.schema import Table
 from sqlalchemy.engine.base import Connection
 
 
 class BaseCopy(object):
+    """
+    Parent class for all common attibutes and methods for copy objects
+    """
+
     def __init__(
         self,
         defer_sql_objs: bool = False,
-        conn=None,
-        table_obj=None,
-        sql_table=None,
+        conn: Connection = None,
+        table_obj: Table = None,
+        sql_table: str = None,
         csv_chunksize: int = 10 ** 6,
     ):
+        """
+        Parameters
+        ----------
+        defer_sql_objs: multiprocessing has issue with passing SQLALchemy objects, so if
+            True, defer attributing these to the object until after pickled by Pool
+        conn: SQLAlchemy connection managed outside of the object
+        table_obj: SQLAlchemy object for the destination SQL Table
+        sql_table: string of SQL table name
+        csv_chunksize: max rows to keep in memory when generating CSV for COPY
+        """
 
         self.rows = 0
         self.columns = None
@@ -46,6 +51,10 @@ def instantiate_sql_objs(self, conn, table_obj):
         self.foreign_keys = table_obj.foreign_key_constraints
 
     def drop_pk(self):
+        """
+        Drop primary key constraints on PostgreSQL table as well as CASCADE any other
+        constraints that may rely on the PK
+        """
         logger.info(f"Dropping {self.sql_table} primary key")
         try:
             with self.conn.begin_nested():
@@ -54,10 +63,12 @@ def drop_pk(self):
             logger.info(f"{self.sql_table} primary key not found. Skipping")
 
     def create_pk(self):
+        """Create primary key constraints on PostgreSQL table"""
         logger.info(f"Creating {self.sql_table} primary key")
         self.conn.execute(AddConstraint(self.primary_key))
 
     def drop_fks(self):
+        """Drop foreign key constraints on PostgreSQL table"""
         for fk in self.foreign_keys:
             logger.info(f"Dropping foreign key {fk.name}")
             try:
@@ -67,6 +78,7 @@ def drop_fks(self):
                 logger.warn(f"Foreign key {fk.name} not found")
 
     def create_fks(self):
+        """Create foreign key constraints on PostgreSQL table"""
         for fk in self.foreign_keys:
             try:
                 logger.info(f"Creating foreign key {fk.name}")
@@ -75,14 +87,17 @@ def create_fks(self):
                 logger.warn(f"Error creating foreign key {fk.name}")
 
     def truncate(self):
+        """TRUNCATE PostgreSQL table"""
         logger.info(f"Truncating {self.sql_table}")
         self.conn.execute(f"TRUNCATE TABLE {self.sql_table};")
 
     def analyze(self):
+        """Run ANALYZE on PostgreSQL table"""
         logger.info(f"Analyzing {self.sql_table}")
         self.conn.execute(f"ANALYZE {self.sql_table};")
 
-    def copy_from_file(self, file_object):
+    def copy_from_file(self, file_object: StringIO):
+        """COPY to PostgreSQL table using StringIO CSV object"""
         cur = self.conn.connection.cursor()
         cols = ", ".join([f"{col}" for col in self.columns])
         sql = f"COPY {self.sql_table} ({cols}) FROM STDIN WITH CSV HEADER FREEZE"
diff --git a/pandas_to_postgres/copy_df.py b/pandas_to_postgres/copy_df.py
@@ -23,7 +23,7 @@ def __init__(
         csv_chunksize: int = 10 ** 6,
         levels: dict = None,
     ):
-        BaseCopy(defer_sql_objs, conn, table_obj, csv_chunksize)
+        super().__init__(defer_sql_objs, conn, table_obj, csv_chunksize)
 
         self.df = df
         self.levels = levels
diff --git a/pandas_to_postgres/copy_hdf.py b/pandas_to_postgres/copy_hdf.py
@@ -9,7 +9,7 @@
 )
 
 from ._base_copy import BaseCopy
-
+from typing import List
 import pandas as pd
 from sqlalchemy.sql.schema import Table
 from sqlalchemy.engine.base import Connection
@@ -18,15 +18,15 @@
 class HDFTableCopy(BaseCopy):
     def __init__(
         self,
-        hdf_tables: list,
+        hdf_tables: List[str],
         hdf_meta: HDFMetadata,
         defer_sql_objs: bool = False,
-        conn=None,
-        table_obj=None,
-        sql_table=None,
+        conn: Connection = None,
+        table_obj: Table = None,
+        sql_table: str = None,
         csv_chunksize: int = 10 ** 6,
     ):
-        BaseCopy.__init__(
+        super().__init__(
             self, defer_sql_objs, conn, table_obj, sql_table, csv_chunksize
         )
 
@@ -86,15 +86,15 @@ def hdf_to_pg(self):
 class ClassificationHDFTableCopy(HDFTableCopy):
     def __init__(
         self,
-        hdf_tables: list,
+        hdf_tables: List[str],
         hdf_meta: HDFMetadata,
         defer_sql_objs: bool = False,
-        conn=None,
-        table_obj=None,
+        conn: Connection = None,
+        table_obj: Table = None,
         sql_table: str = None,
         csv_chunksize: int = 10 ** 6,
     ):
-        HDFTableCopy.__init__(
+        super().__init__(
             self,
             hdf_tables,
             hdf_meta,
@@ -136,15 +136,15 @@ def hdf_to_pg(self):
 class BigHDFTableCopy(HDFTableCopy):
     def __init__(
         self,
-        hdf_tables: list,
+        hdf_tables: List[str],
         hdf_meta: HDFMetadata,
         defer_sql_objs: bool = False,
-        conn=None,
-        table_obj=None,
-        sql_table=None,
+        conn: Connection = None,
+        table_obj: Table = None,
+        sql_table: str = None,
         csv_chunksize: int = 10 ** 6,
     ):
-        HDFTableCopy.__init__(
+        super().__init__(
             self,
             hdf_tables,
             hdf_meta,