Skip to content

Commit a1fb430

Browse files
committed
fix eating an exception
use dispose with sqlalchemy
1 parent 580be9d commit a1fb430

35 files changed

+564
-160
lines changed

build/lib/data_algebra/BigQuery.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,13 @@ def insert_table(
131131
self.drop_table(conn, table_name)
132132
else:
133133
table_exists = True
134+
# noinspection PyBroadException
134135
try:
135136
self.read_query(
136137
conn,
137138
"SELECT * FROM " + self.quote_table_name(table_name) + " LIMIT 1",
138139
)
139-
table_exists = True
140-
except Exception as e:
140+
except Exception:
141141
table_exists = False
142142
if table_exists:
143143
raise ValueError("table " + prepped_table_name + " already exists")
@@ -214,6 +214,7 @@ def example_handle():
214214
# assert os.path.isfile(credential_file)
215215
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_file
216216
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] # trigger key error if not present
217+
# noinspection PyBroadException
217218
try:
218219
data_catalog = "data-algebra-test"
219220
data_schema = "test_1"
@@ -222,5 +223,5 @@ def example_handle():
222223
).db_handle(google.cloud.bigquery.Client())
223224
db_handle.db_model.prepare_connection(db_handle.conn)
224225
return db_handle
225-
except Exception as e:
226+
except Exception:
226227
return None

build/lib/data_algebra/MySQL.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""
2+
Partial adapter of data algebra for MySQL. Not all data algebra operations are supported on this database at this time.
3+
"""
4+
5+
16
import data_algebra.data_ops
27
import data_algebra.db_model
38

@@ -54,7 +59,7 @@ class MySQLModel(data_algebra.db_model.DBModel):
5459
Assuming we are using a sqlalchemy engine as our connection.
5560
"""
5661

57-
def __init__(self, *, supports_with=False):
62+
def __init__(self):
5863
data_algebra.db_model.DBModel.__init__(
5964
self,
6065
string_type="CHAR",
@@ -80,8 +85,7 @@ def example_handle():
8085
"""
8186
# TODO: parameterize this
8287
assert have_sqlalchemy
83-
db_handle = MySQLModel().db_handle(
84-
sqlalchemy.engine.create_engine("mysql+pymysql://jmount@localhost/jmount")
85-
)
88+
db_engine = sqlalchemy.engine.create_engine("mysql+pymysql://jmount@localhost/jmount")
89+
db_handle = MySQLModel().db_handle(conn=db_engine, db_engine=db_engine)
8690
db_handle.db_model.prepare_connection(db_handle.conn)
8791
return db_handle

build/lib/data_algebra/PostgreSQL.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ def example_handle():
4343
"""
4444
# TODO: parameterize this
4545
assert have_sqlalchemy
46-
db_handle = PostgreSQLModel().db_handle(
47-
sqlalchemy.engine.create_engine(r"postgresql://johnmount@localhost/johnmount")
48-
)
46+
db_engine = sqlalchemy.engine.create_engine(r"postgresql://johnmount@localhost/johnmount")
47+
db_handle = PostgreSQLModel().db_handle(conn=db_engine, db_engine=db_engine)
4948
db_handle.db_model.prepare_connection(db_handle.conn)
5049
return db_handle

build/lib/data_algebra/SQLite.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2+
"""
3+
Adapt data_algebra to SQLite database.
4+
"""
5+
16
import math
27
import copy
38
import numpy
@@ -19,6 +24,10 @@
1924
# at least capture to is_bad, which appears to not be implemented
2025
# unless we call prepare connection
2126
def _sqlite_is_bad_expr(dbmodel, expression):
27+
"""
28+
Return SQL to check for bad values.
29+
"""
30+
2231
return (
2332
"is_bad("
2433
+ dbmodel.expr_to_sql(expression.args[0], want_inline_parens=False)
@@ -32,6 +41,10 @@ def _sqlite_is_bad_expr(dbmodel, expression):
3241

3342

3443
def _check_scalar_bad(x):
44+
"""
45+
Return 1 if scalar value is none or nan, else 0.
46+
"""
47+
3548
if x is None:
3649
return 1
3750
if not isinstance(x, numbers.Number):
@@ -42,13 +55,23 @@ def _check_scalar_bad(x):
4255

4356

4457
class MedianAgg:
58+
"""
59+
Aggregate as median. SQLite user class.
60+
"""
61+
4562
def __init__(self):
4663
self.collection = []
4764

4865
def step(self, value):
66+
"""
67+
Observe value
68+
"""
4969
self.collection.append(value)
5070

5171
def finalize(self):
72+
"""
73+
Return result.
74+
"""
5275
return numpy.median(self.collection)
5376

5477

@@ -75,6 +98,9 @@ def _unquote_identifier(self, s: str) -> str:
7598
return res
7699

77100
def prepare_connection(self, conn):
101+
"""
102+
Insert user functions into db.
103+
"""
78104
# https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.create_function
79105
conn.create_function("is_bad", 1, _check_scalar_bad)
80106
saw = set()
@@ -262,6 +288,9 @@ def natural_join_to_near_sql(
262288
sql_format_options=None,
263289
left_is_first=True
264290
):
291+
"""
292+
Translate a join into SQL, converting right and full joins to replacement code (as SQLite doesn't have these).
293+
"""
265294
if join_node.node_name != "NaturalJoinNode":
266295
raise TypeError(
267296
"Expected join_node to be a data_algebra.data_ops.NaturalJoinNode)"

build/lib/data_algebra/SparkSQL.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2+
"""
3+
SparkSQL adapter for the data algebra.
4+
"""
5+
16
import data_algebra.data_ops
27
import data_algebra.db_model
38

@@ -36,8 +41,14 @@ def _sparksql_is_bad_expr(dbmodel, expression):
3641

3742

3843
# treat NaN as NULL, as Pandas has a hard time distinguishing the two
39-
def _sparksql_coalesce_expr(dbmodel, expression):
40-
def coalesce_step(x):
44+
def _sparksql_coalesce_expr(dbmodel, expression) -> str:
45+
"""
46+
Return coalesce expression.
47+
"""
48+
def coalesce_step(x: str) -> str:
49+
"""
50+
Return one caes of coalesce.
51+
"""
4152
assert isinstance(x, str)
4253
return f" WHEN ({x} IS NOT NULL) AND (NOT isNaN({x})) THEN {x} "
4354

@@ -89,6 +100,9 @@ def _sparksql_db_mapv(dbmodel, expression):
89100

90101

91102
class SparkConnection:
103+
"""
104+
Holder for spark conext and session as a connection (defines close).
105+
"""
92106
def __init__(self, *, spark_context, spark_session):
93107
assert have_Spark
94108
assert isinstance(spark_context, pyspark.context.SparkContext)
@@ -97,6 +111,9 @@ def __init__(self, *, spark_context, spark_session):
97111
self.spark_session = spark_session
98112

99113
def close(self):
114+
"""
115+
Stop context and release reference to context and session.
116+
"""
100117
if self.spark_context is not None:
101118
self.spark_context.stop() # probably only for local demos
102119
self.spark_context = None

build/lib/data_algebra/db_model.py

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
Base clase for SQL adapters for data algebra.
3+
"""
4+
15
import math
26
import re
37
from collections import OrderedDict
@@ -53,6 +57,7 @@ def __init__(
5357
def __str__(self):
5458
return self.__repr__()
5559

60+
# noinspection PyUnusedLocal
5661
def _repr_pretty_(self, p, cycle):
5762
"""
5863
IPython pretty print, used at implicit print time
@@ -103,6 +108,7 @@ def _db_mean_expr(dbmodel, expression):
103108
)
104109

105110

111+
# noinspection PyUnusedLocal
106112
def _db_size_expr(dbmodel, expression):
107113
return "SUM(1)"
108114

@@ -306,9 +312,6 @@ def _db_nunique_expr(dbmodel, expression):
306312
)
307313

308314

309-
# fns that had been in bigquery_user_fns
310-
311-
312315
def _as_int64(dbmodel, expression):
313316
return (
314317
"CAST("
@@ -487,7 +490,7 @@ def _base_Sunday(dbmodel, expression):
487490
"**": _db_pow_expr,
488491
"nunique": _db_nunique_expr,
489492
"mapv": _db_mapv,
490-
# fns that had been in bigquery_user_fns
493+
# additional fns
491494
"as_int64": _as_int64,
492495
"as_str": _as_str,
493496
"trimstr": _trimstr,
@@ -588,8 +591,13 @@ def __init__(
588591
self.union_all_term_start = union_all_term_start
589592
self.union_all_term_end = union_all_term_end
590593

591-
def db_handle(self, conn):
592-
return DBHandle(db_model=self, conn=conn)
594+
def db_handle(self, conn, db_engine=None):
595+
"""
596+
597+
:param conn: database connection
598+
:param db_engine: optional sqlalchemy style engine (for closing)
599+
"""
600+
return DBHandle(db_model=self, conn=conn, db_engine=db_engine)
593601

594602
def prepare_connection(self, conn):
595603
pass
@@ -630,8 +638,8 @@ def read_query(self, conn, q):
630638
def table_exists(self, conn, table_name: str) -> bool:
631639
assert isinstance(table_name, str)
632640
q_table_name = self.quote_table_name(table_name)
633-
# noinspection PyBroadException
634641
table_exists = True
642+
# noinspection PyBroadException
635643
try:
636644
self.read_query(conn, "SELECT * FROM " + q_table_name + " LIMIT 1")
637645
except Exception:
@@ -1856,9 +1864,19 @@ def __repr__(self):
18561864

18571865

18581866
class DBHandle:
1859-
def __init__(self, *, db_model: DBModel, conn):
1867+
"""
1868+
Container for database connection handles.
1869+
"""
1870+
def __init__(self, *, db_model: DBModel, conn, db_engine=None):
1871+
"""
1872+
1873+
:param db_model: associated database model
1874+
:param conn: database connection
1875+
:param db_engine: optional sqlalchemy style engine (for closing)
1876+
"""
18601877
assert isinstance(db_model, DBModel)
18611878
self.db_model = db_model
1879+
self.db_engine = db_engine
18621880
self.conn = conn
18631881

18641882
def __enter__(self):
@@ -1924,8 +1942,21 @@ def __repr__(self):
19241942

19251943
def close(self) -> None:
19261944
if self.conn is not None:
1927-
try:
1928-
self.conn.close()
1929-
except Exception:
1930-
pass
1945+
caught = None
1946+
if self.db_engine is not None:
1947+
# sqlalchemy style handle
1948+
# noinspection PyBroadException
1949+
try:
1950+
self.db_engine.dispose()
1951+
except Exception as ex:
1952+
caught = ex
1953+
else:
1954+
# noinspection PyBroadException
1955+
try:
1956+
self.conn.close()
1957+
except Exception as ex:
1958+
caught = ex
1959+
self.db_engine = None
19311960
self.conn = None
1961+
if caught is not None:
1962+
raise ValueError('close caught: ' + str(caught))

build/lib/data_algebra/expr_rep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ def mapv(self, value_map, default_value):
612612
"mapv", x=value_map, y=default_value, inline=False, method=True
613613
)
614614

615-
# fns that had been in bigquery_user_fns
615+
# additional fns
616616

617617
def as_int64(self):
618618
return self.__uop_expr__("as_int64")

build/lib/data_algebra/pandas_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Base clas for adapters for Pandas-like APIs
2+
Base class for adapters for Pandas-like APIs
33
"""
44

55
from abc import ABC
@@ -206,7 +206,7 @@ def populate_impl_map(data_model):
206206
a, b
207207
),
208208
"mapv": _map_v,
209-
# fns that had been in bigquery_user_fns
209+
# additonal fns
210210
# x is a pandas Series
211211
"as_int64": lambda x: x.astype("int64").copy(),
212212
"as_str": lambda x: x.astype("str").copy(),

build/lib/data_algebra/test_util.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def _run_handle_experiments(
153153
*,
154154
db_handle,
155155
data: Dict,
156-
ops: ValueError,
156+
ops: ViewRepresentation,
157157
sql_statements: Iterable[str],
158158
expect,
159159
float_tol: float = 1e-8,
@@ -164,14 +164,18 @@ def _run_handle_experiments(
164164
alter_cache: bool = True,
165165
test_direct_ops_path=False,
166166
):
167+
"""
168+
Run ops and sql_statements on db_handle, checking if result matches expect.
169+
"""
167170
assert isinstance(db_handle, data_algebra.db_model.DBHandle)
168171
assert isinstance(db_handle.db_model, data_algebra.db_model.DBModel)
172+
assert isinstance(ops, ViewRepresentation)
169173
assert db_handle.conn is not None
170174
if isinstance(db_handle.db_model, data_algebra.SQLite.SQLiteModel):
171175
test_direct_ops_path = True
172176
db_handle_key = str(db_handle.db_model)
173177
sql_statements = list(sql_statements)
174-
res_db_sql = [None] * len(sql_statements)
178+
res_db_sql = list([None] * len(sql_statements)) # extra list() wrapper for PyCharm's type checker
175179
res_db_ops = None
176180
need_to_run = True
177181
dict_keys = list(data.keys())
@@ -192,7 +196,7 @@ def mk_key(ii):
192196
except KeyError:
193197
pass
194198
need_to_run = test_direct_ops_path or numpy.any(
195-
[resi is None for resi in res_db_sql]
199+
[result_i is None for result_i in res_db_sql]
196200
)
197201
# generate any new needed results
198202
if need_to_run:
@@ -222,6 +226,7 @@ def mk_key(ii):
222226
raise ValueError(f"{db_handle} error in test " + str(caught))
223227
# check results
224228
for res in res_db_sql:
229+
assert res is not None
225230
if not equivalent_frames(
226231
res,
227232
expect,

build/lib/data_algebra/util.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def pandas_to_example_str(obj, *, local_data_model=None) -> str:
4040

4141
def table_is_keyed_by_columns(table, column_names: Iterable[str]) -> bool:
4242
"""
43+
Check if a table is keyed by a given list of column names.
4344
4445
:param table: pandas DataFrame
4546
:param column_names: list of column names
@@ -64,6 +65,11 @@ def table_is_keyed_by_columns(table, column_names: Iterable[str]) -> bool:
6465

6566
# noinspection PyBroadException
6667
def _mk_type_conversion_table():
68+
"""
69+
Build up conversion from type aliases we do not want into standard types. Eat any errors or warnings during table
70+
construction.
71+
"""
72+
6773
type_conversions_table = dict()
6874
# DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`.
6975
# To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe.

0 commit comments

Comments
 (0)