Skip to content
This repository was archived by the owner on Sep 20, 2023. It is now read-only.

Insight into "auto-commit mode" error #50

@robertness

Description

@robertness

I am trying to write an Airflow hook for connecting to Athena, and I'm getting this error about 'Athena JDBC connection is only supported for auto-commit mode.' Can you offer any insight into this error?

`class AthenaHook(S3Hook):
"""
Hook to work with Athena and associated data in S3 buckets.
"""

def __init__(self, s3_staging_dir, aws_conn_id):
    self.region_name = None
    self.conn = None
    self.s3_staging_dir = s3_staging_dir
    super(AthenaHook, self).__init__(aws_conn_id)

def connect(self,**kwargs):
    """
    Connect to Athena using JDBC. Credentials handled by S3hook
    """

    credentials = map(str, self.get_credentials())
    self.conn = jdbc_connect(
        s3_staging_dir=self.s3_staging_dir,
        access_key=credentials[0],
        secret_key=credentials[1],
        region_name=self.region_name
    )

def dataframe(self, query):
    """
    Pull the results of a query into a data frame

    Args:
        query(str): A SQL query to be run in Athena.

    Returns:
        (pd.DataFrame): A dataframe containing the results of the query
    """
    return pd.read_sql(query, self.conn)

class AthenaPlugin(AirflowPlugin):
name = "athena_plugin"
hooks = [AthenaHook]
`
Here is the traceback:

In [15]: hook.dataframe('SELECT * from posterior_db.strain_posterior_local LIMIT 10')

[2017-11-08 14:19:27,349] {cursor.py:193} ERROR - Failed to execute query.
Traceback (most recent call last):
File "/home/rness/projects/ztl/venv/lib/python2.7/site-packages/pyathenajdbc/cursor.py", line 172, in execute
result_set = self._statement.executeQuery(query)
java.sql.SQLExceptionPyRaisable: java.sql.SQLException: Failed to run query

DatabaseError Traceback (most recent call last)
in ()
----> 1 hook.dataframe('SELECT * from posterior_db.strain_posterior_local LIMIT 10')

/home/rness/projects/ztl/zflow/plugins/athena_plugin.pyc in dataframe(self, query)
55 (pd.DataFrame): A dataframe containing the results of the query
56 """
---> 57 return pd.read_sql(query, self.conn)
58
59

/home/rness/projects/ztl/venv/lib/python2.7/site-packages/pandas/io/sql.pyc in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
397 sql, index_col=index_col, params=params,
398 coerce_float=coerce_float, parse_dates=parse_dates,
--> 399 chunksize=chunksize)
400
401 try:

/home/rness/projects/ztl/venv/lib/python2.7/site-packages/pandas/io/sql.pyc in read_query(self, sql, index_col, coerce_float, params, parse_dates, chunksize)
1434
1435 args = _convert_params(sql, params)
-> 1436 cursor = self.execute(*args)
1437 columns = [col_desc[0] for col_desc in cursor.description]
1438

/home/rness/projects/ztl/venv/lib/python2.7/site-packages/pandas/io/sql.pyc in execute(self, *args, **kwargs)
1407 ex = DatabaseError("Execution failed on sql: %s\n%s\nunable"
1408 " to rollback" % (args[0], exc))
-> 1409 raise_with_traceback(ex)
1410
1411 ex = DatabaseError(

/home/rness/projects/ztl/venv/lib/python2.7/site-packages/pandas/io/sql.pyc in execute(self, *args, **kwargs)
1403 except Exception as exc:
1404 try:
-> 1405 self.con.rollback()
1406 except Exception: # pragma: no cover
1407 ex = DatabaseError("Execution failed on sql: %s\n%s\nunable"

/home/rness/projects/ztl/venv/lib/python2.7/site-packages/pyathenajdbc/connection.pyc in rollback(self)
148
149 def rollback(self):
--> 150 raise NotSupportedError('Athena JDBC connection is only supported for auto-commit mode.')

DatabaseError: Execution failed on sql: SELECT * from posterior_db.strain_posterior_local LIMIT 10
java.sql.SQLException: Failed to run query
unable to rollback

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions