Skip to content

Commit e67dde1

Browse files
authored
Rewrite the Drill DB-API implementation using ijson. (#69)
1 parent 26da485 commit e67dde1

File tree

6 files changed

+532
-368
lines changed

6 files changed

+532
-368
lines changed

setup.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
long_description = f.read()
3030

3131
setup(name='sqlalchemy_drill',
32-
version='1.0.0',
32+
version='1.1.0',
3333
description="Apache Drill for SQLAlchemy",
3434
long_description=long_description,
3535
long_description_content_type="text/markdown",
@@ -49,20 +49,22 @@
4949
],
5050
install_requires=[
5151
"requests",
52-
"numpy",
53-
"pandas",
52+
"ijson",
5453
"sqlalchemy"
5554
],
5655
extras_require={
5756
"jdbc": ["JPype1", "JayDeBeApi"],
5857
"odbc": ["pyodbc"],
5958
},
6059
keywords='SQLAlchemy Apache Drill',
61-
author='John Omernik, Charles Givre, Davide Miceli, Massimo Martiradonna',
62-
author_email='john@omernik.com, cgivre@thedataist.com, davide.miceli.dap@gmail.com, massimo.martiradonna.dap@gmail.com',
60+
author='John Omernik, Charles Givre, Davide Miceli, Massimo Martiradonna'
61+
', James Turton',
62+
author_email='john@omernik.com, cgivre@thedataist.com, davide.miceli.dap'
63+
'@gmail.com, massimo.martiradonna.dap@gmail.com, james@somecomputer.xyz',
6364
license='MIT',
64-
url = 'https://github.com/JohnOmernik/sqlalchemy-drill',
65-
download_url = 'https://github.com/JohnOmernik/sqlalchemy-drill/archive/1.0.0.tar.gz',
65+
url='https://github.com/JohnOmernik/sqlalchemy-drill',
66+
download_url='https://github.com/JohnOmernik/sqlalchemy-drill/archive/'
67+
'1.1.0.tar.gz',
6668
packages=find_packages(),
6769
include_package_data=True,
6870
tests_require=['nose >= 0.11'],
@@ -76,4 +78,4 @@
7678
'drill.odbc = sqlalchemy_drill.odbc:DrillDialect_odbc',
7779
]
7880
}
79-
)
81+
)

sqlalchemy_drill/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2020
# DEALINGS IN THE SOFTWARE.
2121

22-
__version__ = '1.0.0'
22+
__version__ = '1.1.0'
2323
from sqlalchemy.dialects import registry
2424

2525
registry.register("drill", "sqlalchemy_drill.sadrill", "DrillDialect_sadrill")

sqlalchemy_drill/base.py

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
'json': types.JSON,
5959
}
6060

61+
6162
class DrillCompiler_sadrill(compiler.SQLCompiler):
6263

6364
def default_from(self):
@@ -76,13 +77,16 @@ def visit_table(self, table, asfrom=False, **kwargs):
7677
try:
7778
fixed_schema = ""
7879
if table.schema != "":
79-
fixed_schema = ".".join(["`{i}`".format(i=i.replace('`', '')) for i in table.schema.split(".")])
80+
fixed_schema = ".".join(
81+
["`{i}`".format(i=i.replace('`', '')) for i in table.schema.split(".")])
8082
fixed_table = "{fixed_schema}.`{table_name}`".format(
81-
fixed_schema=fixed_schema,table_name=table.name.replace("`", "")
83+
fixed_schema=fixed_schema, table_name=table.name.replace(
84+
"`", "")
8285
)
8386
return fixed_table
8487
except Exception as ex:
85-
logging.error( "Error in DrillCompiler_sadrill.visit_table :: " + str(ex))
88+
logging.error(
89+
"Error in DrillCompiler_sadrill.visit_table :: " + str(ex))
8690

8791
else:
8892
return ""
@@ -151,7 +155,8 @@ class DrillIdentifierPreparer(compiler.IdentifierPreparer):
151155
)
152156

153157
def __init__(self, dialect):
154-
super(DrillIdentifierPreparer, self).__init__(dialect, initial_quote='`', final_quote='`')
158+
super(DrillIdentifierPreparer, self).__init__(
159+
dialect, initial_quote='`', final_quote='`')
155160

156161
def format_drill_table(self, schema, isFile=True):
157162
formatted_schema = ""
@@ -171,7 +176,8 @@ def format_drill_table(self, schema, isFile=True):
171176
elif isFile and num_dots == 2:
172177
# Case for file and no workspace
173178
plugin = schema_parts[0]
174-
formatted_schema = plugin + "." + schema_parts[1] + ".`" + schema_parts[2] + "`"
179+
formatted_schema = plugin + "." + \
180+
schema_parts[1] + ".`" + schema_parts[2] + "`"
175181
else:
176182
# Case for non-file plugins or incomplete schema parts
177183
for part in schema_parts:
@@ -184,7 +190,6 @@ def format_drill_table(self, schema, isFile=True):
184190
return formatted_schema
185191

186192

187-
188193
class DrillDialect(default.DefaultDialect):
189194
name = 'drilldbapi'
190195
driver = 'rest'
@@ -242,7 +247,8 @@ def create_connect_args(self, url, **kwargs):
242247
if url.password:
243248
qargs['drillpass'] = url.password
244249
except Exception as ex:
245-
logging.error("Error in DrillDialect_sadrill.create_connect_args :: " + str(ex))
250+
logging.error(
251+
"Error in DrillDialect_sadrill.create_connect_args :: " + str(ex))
246252

247253
return [], qargs
248254

@@ -274,7 +280,8 @@ def get_schema_names(self, connection, **kw):
274280
if row.SCHEMA_NAME != "cp.default" and row.SCHEMA_NAME != "INFORMATION_SCHEMA" and row.SCHEMA_NAME != "dfs.default":
275281
result.append(row.SCHEMA_NAME)
276282
except Exception as ex:
277-
logging.error(("Error in DrillDialect_sadrill.get_schema_names :: ", str(ex)))
283+
logging.error(
284+
("Error in DrillDialect_sadrill.get_schema_names :: ", str(ex)))
278285

279286
return tuple(result)
280287

@@ -312,7 +319,8 @@ def get_table_names(self, connection, schema=None, **kw):
312319
tables_names.append(myname)
313320

314321
except Exception as ex:
315-
logging.error("Error in DrillDialect_sadrill.get_table_names :: " + str(ex))
322+
logging.error(
323+
"Error in DrillDialect_sadrill.get_table_names :: " + str(ex))
316324

317325
return tuple(tables_names)
318326
else:
@@ -328,20 +336,23 @@ def get_table_names(self, connection, schema=None, **kw):
328336
tables_names.append(myname)
329337

330338
except Exception as ex:
331-
logging.error("Error in DrillDialect_sadrill.get_table_names :: " + str(ex))
339+
logging.error(
340+
"Error in DrillDialect_sadrill.get_table_names :: " + str(ex))
332341

333342
return tuple(tables_names)
334343

335344
def get_view_names(self, connection, schema=None, **kw):
336345
view_names = []
337-
curs = connection.execute("SELECT `TABLE_NAME` FROM INFORMATION_SCHEMA.views WHERE table_schema='" + schema + "'")
346+
curs = connection.execute(
347+
"SELECT `TABLE_NAME` FROM INFORMATION_SCHEMA.views WHERE table_schema='" + schema + "'")
338348
try:
339349
for row in curs:
340350
myname = row.TABLE_NAME
341351
view_names.append(myname)
342352

343353
except Exception as ex:
344-
logging.error("Error in DrillDialect_sadrill.get_view_names :: " + str(ex))
354+
logging.error(
355+
"Error in DrillDialect_sadrill.get_view_names :: " + str(ex))
345356

346357
return tuple(view_names)
347358

@@ -350,7 +361,8 @@ def has_table(self, connection, table_name, schema=None):
350361
self.get_columns(connection, table_name, schema)
351362
return True
352363
except exc.NoSuchTableError:
353-
logging.error("Error in DrillDialect_sadrill.has_table :: " + exc.NoSuchTableError)
364+
logging.error(
365+
"Error in DrillDialect_sadrill.has_table :: " + exc.NoSuchTableError)
354366
return False
355367

356368
def _check_unicode_returns(self, connection, additional_tests=None):
@@ -381,21 +393,26 @@ def get_columns(self, connection, table_name, schema=None, **kw):
381393
if plugin_type == "file" or plugin_type == "mongo":
382394
views = self.get_view_names(connection, schema)
383395

384-
385396
file_name = schema + "." + table_name
386-
quoted_file_name = self.identifier_preparer.format_drill_table(file_name, isFile=True)
397+
quoted_file_name = self.identifier_preparer.format_drill_table(
398+
file_name, isFile=True)
387399

388400
# Since MongoDB uses the ** notation, bypass that and query the data directly.
389401
if plugin_type == "mongo":
390402
print("FILE NAME:", file_name, quoted_file_name)
391-
mongo_quoted_file_name = self.identifier_preparer.format_drill_table(file_name, isFile=False)
392-
q = "SELECT `**` FROM {table_name} LIMIT 1".format(table_name=mongo_quoted_file_name)
403+
mongo_quoted_file_name = self.identifier_preparer.format_drill_table(
404+
file_name, isFile=False)
405+
q = "SELECT `**` FROM {table_name} LIMIT 1".format(
406+
table_name=mongo_quoted_file_name)
393407
elif table_name in views:
394408
logging.debug("View: ", quoted_file_name, table_name, schema)
395-
view_name = "`{schema}`.`{table_name}`".format(schema=schema, table_name=table_name)
396-
q = "SELECT * FROM {file_name} LIMIT 1".format(file_name=view_name)
409+
view_name = "`{schema}`.`{table_name}`".format(
410+
schema=schema, table_name=table_name)
411+
q = "SELECT * FROM {file_name} LIMIT 1".format(
412+
file_name=view_name)
397413
else:
398-
q = "SELECT * FROM {file_name} LIMIT 1".format(file_name=quoted_file_name)
414+
q = "SELECT * FROM {file_name} LIMIT 1".format(
415+
file_name=quoted_file_name)
399416

400417
column_metadata = connection.execute(q).cursor.description
401418

@@ -419,7 +436,8 @@ def get_columns(self, connection, table_name, schema=None, **kw):
419436
elif "SELECT " in table_name:
420437
q = "SELECT * FROM ({table_name}) LIMIT 1".format(table_name=table_name)
421438
else:
422-
quoted_schema = self.identifier_preparer.format_drill_table(schema + "." + table_name, isFile=False)
439+
quoted_schema = self.identifier_preparer.format_drill_table(
440+
schema + "." + table_name, isFile=False)
423441
q = "DESCRIBE {table_name}".format(table_name=quoted_schema)
424442
logging.debug("QUERY:" + q)
425443
query_results = connection.execute(q)
@@ -451,6 +469,6 @@ def get_plugin_type(self, connection, plugin=None):
451469
return plugin_type
452470

453471
except Exception as ex:
454-
logging.error("Error in DrillDialect_sadrill.get_plugin_type :: " + str(ex))
472+
logging.error(
473+
"Error in DrillDialect_sadrill.get_plugin_type :: " + str(ex))
455474
return False
456-

0 commit comments

Comments
 (0)