Skip to content
This repository was archived by the owner on May 5, 2022. It is now read-only.

Commit 3f878bd

Browse files
committed
chores: add Dockerfile.superset
1 parent 1af305f commit 3f878bd

File tree

4 files changed

+246
-0
lines changed

4 files changed

+246
-0
lines changed

hack/Dockerfile.superset

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
FROM python:3.8-slim as base
2+
LABEL maintainer="Teko's DataOps Team <[email protected]>"
3+
SHELL ["/bin/bash", "-c"]
4+
5+
RUN set -eux; \
6+
apt-get update; \
7+
apt-get install -y --no-install-recommends \
8+
libbz2-1.0 liblz4-1 libsnappy1v5 zlib1g libzstd1 \
9+
libev4 libssl1.1 libisal2 libnss3 \
10+
libpq5 libmariadb3 \
11+
curl locales; \
12+
\
13+
apt-get clean; \
14+
rm -rf /var/lib/apt/lists/*
15+
16+
# Firefox + Gecko driver. For Ubuntu, using `apt install firefox firefox-geckodriver`
17+
ARG GECKO_DRIVER_VERSION=v0.29.0
18+
RUN set -eux; cd /tmp/; \
19+
apt-get update; \
20+
apt-get install -y --no-install-recommends \
21+
xvfb libxi6 libgconf-2-4 \
22+
firefox-esr; \
23+
apt-get clean; \
24+
rm -rf /var/lib/apt/lists/*; \
25+
curl -sSL "https://github.com/mozilla/geckodriver/releases/download/${GECKO_DRIVER_VERSION}/geckodriver-${GECKO_DRIVER_VERSION}-linux64.tar.gz" \
26+
| tar -xzf - -C /usr/local/bin --no-same-owner;
27+
28+
RUN set -eux; \
29+
sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen; \
30+
locale-gen; \
31+
update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8;
32+
33+
ENV SUPERSET_HOME="/opt/superset"
34+
RUN set -eux; \
35+
useradd -ms "/bin/bash" --uid=1000 superset; \
36+
mkdir -p "${SUPERSET_HOME}"; \
37+
chown -R superset: "${SUPERSET_HOME}";
38+
39+
WORKDIR ${SUPERSET_HOME}
40+
41+
FROM base AS builder
42+
43+
ARG SUPERSET_VERSION=1.0.1
44+
45+
RUN set -eux; \
46+
apt-get update; \
47+
apt-get install -y \
48+
build-essential \
49+
# pip install pyodbc
50+
unixodbc-dev \
51+
# pip install mysqlclient
52+
default-libmysqlclient-dev \
53+
# pip install pylibmc
54+
libmemcached-dev \
55+
# pip install sasl
56+
libsasl2-dev;
57+
58+
RUN set -eux; \
59+
function join { local IFS="$1"; echo "${*:2}"; }; \
60+
SUPERSET_PACKAGES=( \
61+
# Cloud
62+
athena bigquery redshift \
63+
dremio snowflake teradata vertica exasol \
64+
# Database
65+
mysql postgres mmsql oracle db2 hana \
66+
clickhouse cockroachdb elasticsearch \
67+
excel gsheets \
68+
# Big Data
69+
drill druid hive impala kylin pinot presto \
70+
# Others
71+
cors thumbnails \
72+
); \
73+
pip install "apache-superset[$(join ',' ${SUPERSET_PACKAGES[@]})]==${SUPERSET_VERSION}" \
74+
"gunicorn[gevent,eventlet]" "flower~=0.9" "authlib~=0.15" "redis~=3.5" "pylibmc~=1.6"; \
75+
\
76+
rm -rf /usr/local/cx_Oracle-doc;
77+
78+
FROM base
79+
80+
COPY --from=builder /usr/local /usr/local
81+
COPY ./scripts/superset/* /usr/local/bin/
82+
RUN mv /usr/local/bin/trino.py /usr/local/lib/python3.8/site-packages/superset/db_engine_specs/
83+
84+
USER superset
85+
EXPOSE 8088 5555
86+
ENTRYPOINT [ "/usr/local/bin/docker-entrypoint.sh" ]
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
set -eo pipefail
3+
4+
case "$1" in
5+
webserver)
6+
superset-tools wait database
7+
superset db upgrade
8+
# https://docs.gunicorn.org/en/stable/settings.html
9+
if [ -n "$GUNICORN_CONFIG_PATH" ]; then
10+
GUNICORN_CMD_ARGS="--config=$GUNICORN_CONFIG_PATH ${GUNICORN_CMD_ARGS}"
11+
fi
12+
exec gunicorn --bind=0.0.0.0:8088 \
13+
${GUNICORN_CMD_ARGS} \
14+
"superset.app:create_app()"
15+
;;
16+
worker|beat|flower)
17+
superset-tools wait broker
18+
# https://superset.apache.org/docs/installation/async-queries-celery
19+
# https://docs.celeryproject.org/en/stable/userguide/configuration.html
20+
# https://docs.celeryproject.org/en/stable/reference/cli.html#celery-worker
21+
if [ -n "$CELERY_CONFIG_PATH" ]; then
22+
CELERY_CMD_ARGS="--config=$CELERY_CONFIG_PATH ${CELERY_CMD_ARGS}"
23+
fi
24+
exec celery "$1" ${CELERY_CMD_ARGS} \
25+
"--app=superset.tasks.celery_app:app"
26+
;;
27+
version)
28+
superset version
29+
;;
30+
*)
31+
exec "$@"
32+
;;
33+
esac
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import sys
4+
import time
5+
from datetime import datetime
6+
7+
from superset import config
8+
9+
10+
def wait_for_database(retries: int, interval: float):
11+
from sqlalchemy import create_engine, sql
12+
from sqlalchemy.engine.base import Engine
13+
from sqlalchemy.exc import OperationalError
14+
15+
engine: Engine = create_engine(config.SQLALCHEMY_DATABASE_URI)
16+
uri = repr(engine.url)
17+
for i in range(retries):
18+
try:
19+
engine.execute(sql.text('SELECT 1')).scalar()
20+
except OperationalError:
21+
print(f'[{datetime.now()}] waiting for database at {uri}... Try {i + 1}/{retries}', file=sys.stderr)
22+
time.sleep(interval)
23+
else:
24+
print(f'[{datetime.now()}] Connect to database at {uri} successfully')
25+
return
26+
27+
sys.exit('SQLAlchemy database still unreachable, giving up')
28+
29+
30+
def wait_for_broker(retries: int, interval: float):
31+
from kombu.connection import Connection
32+
from kombu.exceptions import OperationalError
33+
34+
class errback:
35+
def __init__(self, uri: str, max_retries: int):
36+
self.uri = uri
37+
self.count = 0
38+
self.max_retries = max_retries
39+
40+
def __call__(self, exc, interval):
41+
self.count += 1
42+
msg = f'[{datetime.now()}] waiting for broker at {self.uri}... Try {self.count}/{self.max_retries}'
43+
print(msg, file=sys.stderr)
44+
45+
try:
46+
conn = Connection(config.CELERY_CONFIG.BROKER_URL)
47+
uri = conn.as_uri()
48+
conn.ensure_connection(errback=errback(uri, max_retries=retries),
49+
max_retries=retries, interval_start=interval, interval_max=interval)
50+
print(f'[{datetime.now()}] Connect to broker at {uri} successfully')
51+
except OperationalError:
52+
sys.exit('Celery broker still unreachable, giving up')
53+
54+
55+
def build_argparse():
56+
def wait_service(args: argparse.Namespace):
57+
if args.service == 'database':
58+
wait_for_database(args.retries, args.interval)
59+
elif args.service == 'broker':
60+
wait_for_broker(args.retries, args.interval)
61+
else:
62+
raise ValueError(f'Unknown service {args.service}')
63+
64+
parser = argparse.ArgumentParser()
65+
subparsers = parser.add_subparsers(title='sub-commands')
66+
67+
# wait sub-command
68+
wait = subparsers.add_parser('wait', help='Waiting for external service start-up')
69+
wait.add_argument('service', type=str, choices=['database', 'broker'],
70+
help='Service that waiting for')
71+
wait.add_argument('--retries', type=int, default=30, help='Maximum number of times to retry')
72+
wait.add_argument('--interval', type=float, default=3, help='The number of seconds sleeping for')
73+
wait.set_defaults(func=wait_service)
74+
75+
return parser
76+
77+
78+
def main():
79+
parser = build_argparse()
80+
args = parser.parse_args()
81+
args.func(args)
82+
83+
84+
if __name__ == '__main__':
85+
main()

hack/scripts/superset/trino.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from datetime import datetime
2+
from typing import Optional
3+
4+
from superset.db_engine_specs import BaseEngineSpec
5+
from superset.utils import core as utils
6+
7+
8+
class TrinoEngineSpec(BaseEngineSpec):
9+
engine = "trino"
10+
engine_name = "Trino"
11+
12+
# pylint: disable=line-too-long
13+
_time_grain_expressions = {
14+
None: "{col}",
15+
"PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))",
16+
"PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))",
17+
"PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))",
18+
"P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))",
19+
"P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))",
20+
"P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))",
21+
"P0.25Y": "date_trunc('quarter', CAST({col} AS TIMESTAMP))",
22+
"P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))",
23+
# "1969-12-28T00:00:00Z/P1W", # Week starting Sunday
24+
# "1969-12-29T00:00:00Z/P1W", # Week starting Monday
25+
# "P1W/1970-01-03T00:00:00Z", # Week ending Saturday
26+
# "P1W/1970-01-04T00:00:00Z", # Week ending Sunday
27+
}
28+
29+
@classmethod
30+
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
31+
tt = target_type.upper()
32+
if tt == utils.TemporalType.DATE:
33+
value = dttm.date().isoformat()
34+
return f"from_iso8601_date('{value}')"
35+
if tt == utils.TemporalType.TIMESTAMP:
36+
value = dttm.isoformat(timespec='microseconds')
37+
return f"from_iso8601_timestamp('{value}')"
38+
return None
39+
40+
@classmethod
41+
def epoch_to_dttm(cls) -> str:
42+
return "from_unixtime({col})"

0 commit comments

Comments
 (0)