Skip to content

Commit 9da0e5f

Browse files
committed
sqlalchemy v1 initial setup
1 parent 76a3c28 commit 9da0e5f

31 files changed

+515
-4001
lines changed

README.tests.md

Lines changed: 0 additions & 44 deletions
This file was deleted.

example.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""
2+
databricks-sql-connector includes a SQLAlchemy dialect compatible with Databricks SQL.
3+
It aims to be a drop-in replacement for the crflynn/sqlalchemy-databricks project, that implements
4+
more of the Databricks API, particularly around table reflection, Alembic usage, and data
5+
ingestion with pandas.
6+
7+
Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/***
8+
9+
Because of the extent of SQLAlchemy's capabilities it isn't feasible to provide examples of every
10+
usage in a single script, so we only provide a basic one here. More examples are found in our test
11+
suite at tests/e2e/sqlalchemy/test_basic.py and in the PR that implements this change:
12+
13+
https://github.com/databricks/databricks-sql-python/pull/57
14+
15+
# What's already supported
16+
17+
Most of the functionality is demonstrated in the e2e tests mentioned above. The below list we
18+
derived from those test method names:
19+
20+
- Create and drop tables with SQLAlchemy Core
21+
- Create and drop tables with SQLAlchemy ORM
22+
- Read created tables via reflection
23+
- Modify column nullability
24+
- Insert records manually
25+
- Insert records with pandas.to_sql (note that this does not work for DataFrames with indexes)
26+
27+
This connector also aims to support Alembic for programmatic delta table schema maintenance. This
28+
behaviour is not yet backed by integration tests, which will follow in a subsequent PR as we learn
29+
more about customer use cases there. That said, the following behaviours have been tested manually:
30+
31+
- Autogenerate revisions with alembic revision --autogenerate
32+
- Upgrade and downgrade between revisions with `alembic upgrade <revision hash>` and
33+
`alembic downgrade <revision hash>`
34+
35+
# Known Gaps
36+
- MAP, ARRAY, and STRUCT types: this dialect can read these types out as strings. But you cannot
37+
define a SQLAlchemy model with databricks.sqlalchemy.dialect.types.DatabricksMap (e.g.) because
38+
we haven't implemented them yet.
39+
- Constraints: with the addition of information_schema to Unity Catalog, Databricks SQL supports
40+
foreign key and primary key constraints. This dialect can write these constraints but the ability
41+
for alembic to reflect and modify them programmatically has not been tested.
42+
"""
43+
44+
import os
45+
import sqlalchemy
46+
from sqlalchemy.orm import Session
47+
from sqlalchemy import Column, String, Integer, BOOLEAN, create_engine, select
48+
49+
try:
50+
from sqlalchemy.orm import declarative_base
51+
except ImportError:
52+
from sqlalchemy.ext.declarative import declarative_base
53+
54+
host = os.getenv("DATABRICKS_SERVER_HOSTNAME")
55+
http_path = os.getenv("DATABRICKS_HTTP_PATH")
56+
access_token = os.getenv("DATABRICKS_TOKEN")
57+
catalog = os.getenv("DATABRICKS_CATALOG")
58+
schema = os.getenv("DATABRICKS_SCHEMA")
59+
60+
61+
# Extra arguments are passed untouched to the driver
62+
# See thrift_backend.py for complete list
63+
extra_connect_args = {
64+
"_tls_verify_hostname": True,
65+
"_user_agent_entry": "PySQL Example Script",
66+
}
67+
68+
if sqlalchemy.__version__.startswith("1.3"):
69+
# SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
70+
# Pass these in as connect_args instead
71+
72+
conn_string = f"databricks://token:{access_token}@{host}"
73+
connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
74+
all_connect_args = {**extra_connect_args, **connect_args}
75+
engine = create_engine(conn_string, connect_args=all_connect_args)
76+
else:
77+
engine = create_engine(
78+
f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
79+
connect_args=extra_connect_args,
80+
)
81+
82+
session = Session(bind=engine)
83+
base = declarative_base(bind=engine)
84+
85+
86+
class SampleObject(base):
87+
88+
__tablename__ = "mySampleTable"
89+
90+
name = Column(String(255), primary_key=True)
91+
episodes = Column(Integer)
92+
some_bool = Column(BOOLEAN)
93+
94+
95+
base.metadata.create_all()
96+
97+
sample_object_1 = SampleObject(name="Bim Adewunmi", episodes=6, some_bool=True)
98+
sample_object_2 = SampleObject(name="Miki Meek", episodes=12, some_bool=False)
99+
100+
session.add(sample_object_1)
101+
session.add(sample_object_2)
102+
103+
session.commit()
104+
105+
# SQLAlchemy 1.3 has slightly different methods
106+
if sqlalchemy.__version__.startswith("1.3"):
107+
stmt = select([SampleObject]).where(SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"]))
108+
output = [i for i in session.execute(stmt)]
109+
else:
110+
stmt = select(SampleObject).where(SampleObject.name.in_(["Bim Adewunmi", "Miki Meek"]))
111+
output = [i for i in session.scalars(stmt)]
112+
113+
assert len(output) == 2
114+
115+
base.metadata.drop_all()

pyproject.toml

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "databricks-sqlalchemy"
3-
version = "1.0.0"
3+
version = "1.0.1"
44
description = "Databricks SQLAlchemy plugin for Python"
55
authors = ["Databricks <[email protected]>"]
66
license = "Apache-2.0"
@@ -10,8 +10,8 @@ include = ["CHANGELOG.md"]
1010

1111
[tool.poetry.dependencies]
1212
python = "^3.8.0"
13-
databricks_sql_connector_core = { version = ">=1.0.0"}
14-
sqlalchemy = { version = ">=2.0.21" }
13+
databricks_sql_connector_core = { version = ">=4.0.0"}
14+
sqlalchemy = { version = "^1.3.24" }
1515

1616
[tool.poetry.dev-dependencies]
1717
pytest = "^7.1.2"
@@ -33,11 +33,4 @@ build-backend = "poetry.core.masonry.api"
3333

3434
[tool.black]
3535
exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
36-
#
37-
[tool.pytest.ini_options]
38-
markers = {"reviewed" = "Test case has been reviewed by Databricks"}
39-
minversion = "6.0"
40-
log_cli = "false"
41-
log_cli_level = "INFO"
42-
testpaths = ["tests", "src/databricks/sqlalchemy/test_local"]
43-
env_files = ["test.env"]
36+
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1 @@
1-
from databricks.sqlalchemy.base import DatabricksDialect
2-
from databricks.sqlalchemy._types import TINYINT, TIMESTAMP, TIMESTAMP_NTZ
3-
4-
__all__ = ["TINYINT", "TIMESTAMP", "TIMESTAMP_NTZ"]
1+
from databricks.sqlalchemy.dialect import DatabricksDialect

src/databricks/sqlalchemy/_ddl.py

Lines changed: 0 additions & 100 deletions
This file was deleted.

0 commit comments

Comments
 (0)