druid-io
diff --git a/‎README.md‎
Lines changed: 59 additions & 9 deletions b/‎README.md‎
Lines changed: 59 additions & 9 deletions
diff --git a/‎pydruid/console.py‎
Lines changed: 181 additions & 0 deletions b/‎pydruid/console.py‎
Lines changed: 181 additions & 0 deletions
diff --git a/‎pydruid/db/__init__.py‎
Lines changed: 37 additions & 0 deletions b/‎pydruid/db/__init__.py‎
Lines changed: 37 additions & 0 deletions
@@ -1,6 +1,8 @@
-#pydruid
-pydruid exposes a simple API to create, execute, and analyze [Druid](http://druid.io/) queries. pydruid can parse query results into [Pandas](http://pandas.pydata.org/) DataFrame objects for subsequent data analysis -- this offers a tight integration between [Druid](http://druid.io/), the [SciPy](http://www.scipy.org/stackspec.html) stack (for scientific computing) and [scikit-learn](http://scikit-learn.org/stable/) (for machine learning). Additionally, pydruid can export query results into TSV or JSON for further processing with your favorite tool, e.g., R, Julia, Matlab, Excel.
-It provides both synchronous and asynchronous clients.
+# pydruid
+
+pydruid exposes a simple API to create, execute, and analyze [Druid](http://druid.io/) queries. pydruid can parse query results into [Pandas](http://pandas.pydata.org/) DataFrame objects for subsequent data analysis -- this offers a tight integration between [Druid](http://druid.io/), the [SciPy](http://www.scipy.org/stackspec.html) stack (for scientific computing) and [scikit-learn](http://scikit-learn.org/stable/) (for machine learning). pydruid can export query results into TSV or JSON for further processing with your favorite tool, e.g., R, Julia, Matlab, Excel. It provides both synchronous and asynchronous clients.
+
+Additionally, pydruid implements the [Python DB API 2.0](https://www.python.org/dev/peps/pep-0249/), a [SQLAlchemy dialect](http://docs.sqlalchemy.org/en/latest/dialects/), and a provides a command line interface to interact with Druid.
 
 To install:
 ```python
@@ -11,10 +13,15 @@ pip install pydruid[async]
 pip install pydruid[pandas]
 # or, if you intend to do both
 pip install pydruid[async, pandas]
+# or, if you want to use the SQLAlchemy engine
+pip install pydruid[sqlalchemy]
+# or, if you want to use the CLI
+pip install pydruid[cli]
 ```
 Documentation: https://pythonhosted.org/pydruid/. 
 
-#examples
+# examples
+
 The following exampes show how to execute and analyze the results of three types of queries: timeseries, topN, and groupby. We will use these queries to ask simple questions about twitter's public data set.
 
 ## timeseries 
@@ -118,13 +125,13 @@ plot(g, "tweets.png", layout=layout, vertex_size=2, bbox=(400, 400), margin=25,
 
 ![alt text](https://github.com/metamx/pydruid/raw/master/docs/figures/twitter_graph.png "Social Network")
 
-#asynchronous client
+# asynchronous client
 ```pydruid.async_client.AsyncPyDruid``` implements an asynchronous client. To achieve that, it utilizes an asynchronous
 HTTP client from ```Tornado``` framework. The asynchronous client is suitable for use with async frameworks such as Tornado
 and provides much better performance at scale. It lets you serve multiple requests at the same time, without blocking on
 Druid executing your queries.
 
-##example
+## example
 ```python
 from tornado import gen
 from pydruid.async_client import AsyncPyDruid
@@ -153,7 +160,7 @@ def your_asynchronous_method_serving_top10_mentions_for_day(day
 ```
 
 
-#thetaSketches
+# thetaSketches
 Theta sketch Post aggregators are built slightly differently to normal Post Aggregators, as they have different operators.
 Note: you must have the ```druid-datasketches``` extension loaded into your Druid cluster in order to use these. 
 See the [Druid datasketches](http://druid.io/docs/latest/development/extensions-core/datasketches-aggregators.html) documentation for details.
@@ -185,5 +192,48 @@ ts = query.groupby(
             postaggregator.ThetaSketch('product_A_users') & postaggregator.ThetaSketch('product_B_users')
             )
     }
-    )
-```
+)
+```
+
+# DB API
+
+```python
+from pydruid.db import connect
+
+conn = connect(host='localhost', port=8082, path='/druid/v2/sql/', scheme='http')
+curs = conn.cursor()
+curs.execute("""
+    SELECT place,
+           CAST(REGEXP_EXTRACT(place, '(.*),', 1) AS FLOAT) AS lat,
+           CAST(REGEXP_EXTRACT(place, ',(.*)', 1) AS FLOAT) AS lon
+      FROM places
+     LIMIT 10
+""")
+for row in curs:
+    print(row)
+```
+        
+# SQLAlchemy
+
+```python
+from sqlalchemy import *
+from sqlalchemy.engine import create_engine
+from sqlalchemy.schema import *
+
+engine = create_engine('druid://localhost:8082/druid/v2/sql/')  # uses HTTP by default :(
+# engine = create_engine('druid+http://localhost:8082/druid/v2/sql/')
+# engine = create_engine('druid+https://localhost:8082/druid/v2/sql/')
+
+places = Table('places', MetaData(bind=engine), autoload=True)
+print(select([func.count('*')], from_obj=places).scalar())
+```
+
+# Command line
+
+```bash
+$ pydruid http://localhost:8082/druid/v2/sql/
+> SELECT COUNT(*) AS cnt FROM places
+  cnt
+-----
+12345
+```
@@ -0,0 +1,181 @@
+from __future__ import unicode_literals
+
+import os
+import sys
+
+from prompt_toolkit import prompt, AbortAction
+from prompt_toolkit.history import FileHistory
+from prompt_toolkit.contrib.completers import WordCompleter
+from pygments.lexers import SqlLexer
+from pygments.style import Style
+from pygments.token import Token
+from pygments.styles.default import DefaultStyle
+from six.moves.urllib import parse
+from tabulate import tabulate
+
+from pydruid.db.api import connect
+
+
+keywords = [
+    'EXPLAIN PLAN FOR',
+    'WITH',
+    'SELECT',
+    'ALL',
+    'DISTINCT',
+    'FROM',
+    'WHERE',
+    'GROUP BY',
+    'HAVING',
+    'ORDER BY',
+    'ASC',
+    'DESC',
+    'LIMIT',
+]
+
+aggregate_functions = [
+    'COUNT',
+    'SUM',
+    'MIN',
+    'MAX',
+    'AVG',
+    'APPROX_COUNT_DISTINCT',
+    'APPROX_QUANTILE',
+]
+
+numeric_functions = [
+    'ABS',
+    'CEIL',
+    'EXP',
+    'FLOOR',
+    'LN',
+    'LOG10',
+    'POW',
+    'SQRT',
+]
+
+string_functions = [
+    'CHARACTER_LENGTH',
+    'LOOKUP',
+    'LOWER',
+    'REGEXP_EXTRACT',
+    'REPLACE',
+    'SUBSTRING',
+    'TRIM',
+    'BTRIM',
+    'RTRIM',
+    'LTRIM',
+    'UPPER',
+]
+
+time_functions = [
+    'CURRENT_TIMESTAMP',
+    'CURRENT_DATE',
+    'TIME_FLOOR',
+    'TIME_SHIFT',
+    'TIME_EXTRACT',
+    'TIME_PARSE',
+    'TIME_FORMAT',
+    'MILLIS_TO_TIMESTAMP',
+    'TIMESTAMP_TO_MILLIS',
+    'EXTRACT',
+    'FLOOR',
+    'CEIL',
+]
+
+other_functions = [
+    'CAST',
+    'CASE',
+    'WHEN',
+    'THEN',
+    'END',
+    'NULLIF',
+    'COALESCE',
+]
+
+
+class DocumentStyle(Style):
+    styles = {
+        Token.Menu.Completions.Completion.Current: 'bg:#00aaaa #000000',
+        Token.Menu.Completions.Completion: 'bg:#008888 #ffffff',
+        Token.Menu.Completions.ProgressButton: 'bg:#003333',
+        Token.Menu.Completions.ProgressBar: 'bg:#00aaaa',
+    }
+    styles.update(DefaultStyle.styles)
+
+
+def get_connection_kwargs(url):
+    parts = parse.urlparse(url)
+    if ':' in parts.netloc:
+        host, port = parts.netloc.split(':', 1)
+        port = int(port)
+    else:
+        host = parts.netloc
+        port = 8082
+
+    return {
+        'host': host,
+        'port': port,
+        'path': parts.path,
+        'scheme': parts.scheme,
+    }
+
+
+def get_tables(connection):
+    cursor = connection.cursor()
+    return [
+        row.TABLE_NAME for row in
+        cursor.execute('SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES')
+    ]
+
+
+def get_autocomplete(connection):
+    return (
+        keywords +
+        aggregate_functions +
+        numeric_functions +
+        string_functions +
+        time_functions +
+        other_functions +
+        get_tables(connection)
+    )
+
+
+def main():
+    history = FileHistory(os.path.expanduser('~/.pydruid_history'))
+
+    try:
+        url = sys.argv[1]
+    except IndexError:
+        url = 'http://localhost:8082/druid/v2/sql/'
+    kwargs = get_connection_kwargs(url)
+    connection = connect(**kwargs)
+    cursor = connection.cursor()
+
+    words = get_autocomplete(connection)
+    sql_completer = WordCompleter(words, ignore_case=True)
+
+    while True:
+        try:
+            query = prompt(
+                '> ', lexer=SqlLexer, completer=sql_completer,
+                style=DocumentStyle, history=history,
+                on_abort=AbortAction.RETRY)
+        except EOFError:
+            break  # Control-D pressed.
+
+        # run query
+        if query.strip():
+            try:
+                result = cursor.execute(query.rstrip(';'))
+            except Exception as e:
+                print(e)
+                continue
+
+            headers = [t[0] for t in cursor.description]
+            print(tabulate(result, headers=headers))
+
+    print('GoodBye!')
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,37 @@
+from pydruid.db.api import connect
+from pydruid.db.exceptions import (
+    DataError,
+    DatabaseError,
+    Error,
+    IntegrityError,
+    InterfaceError,
+    InternalError,
+    NotSupportedError,
+    OperationalError,
+    ProgrammingError,
+    Warning,
+)
+
+
+__all__ = [
+    'connect',
+    'apilevel',
+    'threadsafety',
+    'paramstyle',
+    'DataError',
+    'DatabaseError',
+    'Error',
+    'IntegrityError',
+    'InterfaceError',
+    'InternalError',
+    'NotSupportedError',
+    'OperationalError',
+    'ProgrammingError',
+    'Warning',
+]
+
+
+apilevel = '2.0'
+# Threads may share the module and connections
+threadsafety = 2
+paramstyle = 'pyformat'