1
1
import logging
2
2
from typing import List
3
- import pandas as pd
4
- from sqlalchemy .sql .schema import Table
5
-
3
+ from pandas import DataFrame , HDFStore , isna
6
4
from collections import defaultdict
7
5
from io import StringIO
8
6
@@ -28,7 +26,7 @@ def __init__(
28
26
self .sql_to_hdf = defaultdict (set )
29
27
self .levels = {}
30
28
31
- with pd . HDFStore (self .file_name , mode = "r" ) as store :
29
+ with HDFStore (self .file_name , mode = "r" ) as store :
32
30
self .keys = keys or store .keys ()
33
31
34
32
for key in self .keys :
@@ -48,7 +46,7 @@ def __init__(
48
46
logger .warn (f"No SQL table name found for { key } " )
49
47
50
48
51
- def create_file_object (df : pd . DataFrame ) -> StringIO :
49
+ def create_file_object (df : DataFrame ) -> StringIO :
52
50
"""
53
51
Writes pandas dataframe to an in-memory StringIO file object. Adapted from
54
52
https://gist.github.com/mangecoeur/1fbd63d4758c2ba0c470#gistcomment-2086007
@@ -59,7 +57,7 @@ def create_file_object(df: pd.DataFrame) -> StringIO:
59
57
return file_object
60
58
61
59
62
- def df_generator (df : pd . DataFrame , chunksize : int ):
60
+ def df_generator (df : DataFrame , chunksize : int = 10 ** 6 ):
63
61
"""
64
62
Create a generator to iterate over chunks of a dataframe
65
63
@@ -81,8 +79,8 @@ def df_generator(df: pd.DataFrame, chunksize: int):
81
79
82
80
83
81
def cast_pandas (
84
- df : pd . DataFrame , columns : list = None , copy_obj : object = None , ** kwargs
85
- ) -> pd . DataFrame :
82
+ df : DataFrame , columns : list = None , copy_obj : object = None , ** kwargs
83
+ ) -> DataFrame :
86
84
"""
87
85
Pandas does not handle null values in integer or boolean fields out of the
88
86
box, so cast fields that should be these types in the database to object
@@ -111,11 +109,11 @@ def cast_pandas(
111
109
for col in columns :
112
110
if str (col .type ) in ["INTEGER" , "BIGINT" ]:
113
111
df [col .name ] = df [col .name ].apply (
114
- lambda x : None if pd . isna (x ) else int (x ), convert_dtype = False
112
+ lambda x : None if isna (x ) else int (x ), convert_dtype = False
115
113
)
116
114
elif str (col .type ) == "BOOLEAN" :
117
115
df [col .name ] = df [col .name ].apply (
118
- lambda x : None if pd . isna (x ) else bool (x ), convert_dtype = False
116
+ lambda x : None if isna (x ) else bool (x ), convert_dtype = False
119
117
)
120
118
121
119
return df
0 commit comments