Skip to content

Commit d714fc5

Browse files
committed
Docstrings WIP and some import cleanup
1 parent d94dd80 commit d714fc5

File tree

2 files changed

+18
-10
lines changed

2 files changed

+18
-10
lines changed

pandas_to_postgres/_base_copy.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,16 @@ def copy_from_file(self, file_object: StringIO):
117117
cur.copy_expert(sql=sql, file=file_object)
118118

119119
def data_formatting(self, df: DataFrame, functions: List[Callable] = [], **kwargs):
120+
"""
121+
Call each function in the functions list arg on the DataFrame and return
122+
123+
Parameters
124+
----------
125+
df: dataframe to format
126+
functions: list of functions to apply to df. each gets passed df, self as
127+
copy_obj, and all kwargs passed to data_formatting
128+
**kwargs: kwargs to pass on to each function
129+
"""
120130
for f in functions:
121131
df = f(df, copy_obj=self, **kwargs)
122132
return df

pandas_to_postgres/utilities.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import logging
22
from typing import List
3-
import pandas as pd
4-
from sqlalchemy.sql.schema import Table
5-
3+
from pandas import DataFrame, HDFStore, isna
64
from collections import defaultdict
75
from io import StringIO
86

@@ -28,7 +26,7 @@ def __init__(
2826
self.sql_to_hdf = defaultdict(set)
2927
self.levels = {}
3028

31-
with pd.HDFStore(self.file_name, mode="r") as store:
29+
with HDFStore(self.file_name, mode="r") as store:
3230
self.keys = keys or store.keys()
3331

3432
for key in self.keys:
@@ -48,7 +46,7 @@ def __init__(
4846
logger.warn(f"No SQL table name found for {key}")
4947

5048

51-
def create_file_object(df: pd.DataFrame) -> StringIO:
49+
def create_file_object(df: DataFrame) -> StringIO:
5250
"""
5351
Writes pandas dataframe to an in-memory StringIO file object. Adapted from
5452
https://gist.github.com/mangecoeur/1fbd63d4758c2ba0c470#gistcomment-2086007
@@ -59,7 +57,7 @@ def create_file_object(df: pd.DataFrame) -> StringIO:
5957
return file_object
6058

6159

62-
def df_generator(df: pd.DataFrame, chunksize: int):
60+
def df_generator(df: DataFrame, chunksize: int = 10 ** 6):
6361
"""
6462
Create a generator to iterate over chunks of a dataframe
6563
@@ -81,8 +79,8 @@ def df_generator(df: pd.DataFrame, chunksize: int):
8179

8280

8381
def cast_pandas(
84-
df: pd.DataFrame, columns: list = None, copy_obj: object = None, **kwargs
85-
) -> pd.DataFrame:
82+
df: DataFrame, columns: list = None, copy_obj: object = None, **kwargs
83+
) -> DataFrame:
8684
"""
8785
Pandas does not handle null values in integer or boolean fields out of the
8886
box, so cast fields that should be these types in the database to object
@@ -111,11 +109,11 @@ def cast_pandas(
111109
for col in columns:
112110
if str(col.type) in ["INTEGER", "BIGINT"]:
113111
df[col.name] = df[col.name].apply(
114-
lambda x: None if pd.isna(x) else int(x), convert_dtype=False
112+
lambda x: None if isna(x) else int(x), convert_dtype=False
115113
)
116114
elif str(col.type) == "BOOLEAN":
117115
df[col.name] = df[col.name].apply(
118-
lambda x: None if pd.isna(x) else bool(x), convert_dtype=False
116+
lambda x: None if isna(x) else bool(x), convert_dtype=False
119117
)
120118

121119
return df

0 commit comments

Comments
 (0)