diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..39399e4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +language: generic + +services: + - docker + +script: + - docker build . diff --git a/Dockerfile b/Dockerfile index 73ecd84..97bb934 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,96 +1,37 @@ -FROM ubuntu:14.04 +FROM openjdk:8 -# Set version and github repo which you want to build from -ENV GITHUB_OWNER druid-io -ENV DRUID_VERSION 0.12.1 -ENV ZOOKEEPER_VERSION 3.4.10 +ENV DRUID_VERSION 0.15.1-incubating +ENV ZOOKEEPER_VERSION 3.4.14 -# Java 8 -RUN apt-get update \ - && apt-get install -y software-properties-common \ - && apt-add-repository -y ppa:webupd8team/java \ - && apt-get purge --auto-remove -y software-properties-common \ - && apt-get update \ - && echo oracle-java-8-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections \ - && apt-get install -y oracle-java8-installer oracle-java8-set-default \ - mysql-server \ - supervisor \ - git \ - && apt-get clean \ - && rm -rf /var/cache/oracle-jdk8-installer \ - && rm -rf /var/lib/apt/lists/* +# Get Druid +RUN mkdir -p /tmp \ + && cd /tmp/ \ + && curl -fsLS "https://www.apache.org/dyn/closer.cgi?filename=/incubator/druid/$DRUID_VERSION/apache-druid-$DRUID_VERSION-bin.tar.gz&action=download" | tar xvz \ + && mv apache-druid-$DRUID_VERSION /opt/druid -# Maven -RUN wget -q -O - http://archive.apache.org/dist/maven/maven-3/3.2.5/binaries/apache-maven-3.2.5-bin.tar.gz | tar -xzf - -C /usr/local \ - && ln -s /usr/local/apache-maven-3.2.5 /usr/local/apache-maven \ - && ln -s /usr/local/apache-maven/bin/mvn /usr/local/bin/mvn +WORKDIR /opt/druid/ # Zookeeper -RUN wget -q -O - http://www.us.apache.org/dist/zookeeper/zookeeper-$ZOOKEEPER_VERSION/zookeeper-$ZOOKEEPER_VERSION.tar.gz | tar -xzf - -C /usr/local \ - && cp /usr/local/zookeeper-$ZOOKEEPER_VERSION/conf/zoo_sample.cfg /usr/local/zookeeper-$ZOOKEEPER_VERSION/conf/zoo.cfg \ - && ln -s /usr/local/zookeeper-$ZOOKEEPER_VERSION /usr/local/zookeeper +RUN curl -fsLS "https://www.apache.org/dyn/closer.cgi?filename=/zookeeper/zookeeper-$ZOOKEEPER_VERSION/zookeeper-$ZOOKEEPER_VERSION.tar.gz&action=download" | tar xvz \ + && mv zookeeper-$ZOOKEEPER_VERSION zk -# Druid system user -RUN adduser --system --group --no-create-home druid \ - && mkdir -p /var/lib/druid \ - && chown druid:druid /var/lib/druid +ADD config/common.runtime.properties conf/druid/single-server/micro-quickstart/_common/common.runtime.properties -# Druid (from source) -RUN mkdir -p /usr/local/druid/lib - -# trigger rebuild only if branch changed -ADD https://api.github.com/repos/$GITHUB_OWNER/druid/git/refs/heads/$DRUID_VERSION druid-version.json -RUN git clone -q --branch $DRUID_VERSION --depth 1 https://github.com/$GITHUB_OWNER/druid.git /tmp/druid -WORKDIR /tmp/druid - -# package and install Druid locally -# use versions-maven-plugin 2.1 to work around https://jira.codehaus.org/browse/MVERSIONS-285 -RUN mvn -U -B org.codehaus.mojo:versions-maven-plugin:2.1:set -DgenerateBackupPoms=false -DnewVersion=$DRUID_VERSION \ - && mvn -U -B install -DskipTests=true -Dmaven.javadoc.skip=true \ - && cp services/target/druid-services-$DRUID_VERSION-selfcontained.jar /usr/local/druid/lib \ - && cp -r distribution/target/extensions /usr/local/druid/ \ - && cp -r distribution/target/hadoop-dependencies /usr/local/druid/ \ - && apt-get purge --auto-remove -y git \ - && apt-get clean \ - && rm -rf /tmp/* \ - /var/tmp/* \ - /usr/local/apache-maven-3.2.5 \ - /usr/local/apache-maven \ - /root/.m2 - -WORKDIR / - -# Setup metadata store and add sample data -ADD sample-data.sql sample-data.sql -RUN find /var/lib/mysql -type f -exec touch {} \; \ - && /etc/init.d/mysql start \ - && mysql -u root -e "GRANT ALL ON druid.* TO 'druid'@'localhost' IDENTIFIED BY 'diurd'; CREATE database druid CHARACTER SET utf8;" \ - && java -cp /usr/local/druid/lib/druid-services-*-selfcontained.jar \ - -Ddruid.extensions.directory=/usr/local/druid/extensions \ - -Ddruid.extensions.loadList=[\"mysql-metadata-storage\"] \ - -Ddruid.metadata.storage.type=mysql \ - io.druid.cli.Main tools metadata-init \ - --connectURI="jdbc:mysql://localhost:3306/druid" \ - --user=druid --password=diurd \ - && mysql -u root druid < sample-data.sql \ - && /etc/init.d/mysql stop - -# Setup supervisord -ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf +RUN bash -c "./bin/start-micro-quickstart &" && \ + ./bin/post-index-task --file quickstart/tutorial/wikipedia-index.json --url http://localhost:8081 --submit-timeout 600 # Expose ports: +# - 8888: HTTP (router) # - 8081: HTTP (coordinator) # - 8082: HTTP (broker) # - 8083: HTTP (historical) # - 8090: HTTP (overlord) -# - 3306: MySQL # - 2181 2888 3888: ZooKeeper +EXPOSE 8888 EXPOSE 8081 EXPOSE 8082 EXPOSE 8083 EXPOSE 8090 -EXPOSE 3306 EXPOSE 2181 2888 3888 -WORKDIR /var/lib/druid -ENTRYPOINT export HOSTIP="$(resolveip -s $HOSTNAME)" && find /var/lib/mysql -type f -exec touch {} \; && exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf +ENTRYPOINT ./bin/start-micro-quickstart diff --git a/README.md b/README.md index 16e2f98..71291b3 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,18 @@ -# Druid Docker Image +[![Build Status](https://travis-ci.org/Fokko/docker-druid.svg?branch=master)](https://travis-ci.org/Fokko/docker-druid) -## Run a simple Druid cluster +# Apache Druid (Incubating) Docker Image -[Install Docker](docker-install.md) +[Install Docker](https://docs.docker.com/install/) -Download and launch the docker image +## Run a simple Apache Druid (Incubating) cluster +Download and launch the docker image: ```sh docker pull druidio/example-cluster -docker run --rm -i -p 3000:8082 -p 3001:8081 druidio/example-cluster +docker run --rm -i -p 8888:8888 druidio/example-cluster ``` -Wait a minute or so for Druid to start up and download the sample. - -On OS X - -- List datasources - -``` -curl http://$(docker-machine ip default):3000/druid/v2/datasources -``` - -- access the coordinator console - -``` -open http://$(docker-machine ip default):3001/ -``` - -On Linux - -- List datasources - -``` -curl http://localhost:3000/druid/v2/datasources -``` - -- access the coordinator console at http://localhost:3001/ +Once the cluster has started, you can navigate to [http://localhost:8888](http://localhost:8888). The [Druid router process](../development/router.html), which serves the Druid console, resides at this address. ## Build Druid Docker Image @@ -43,36 +20,29 @@ To build the docker image yourself ```sh git clone https://github.com/druid-io/docker-druid.git -docker build -t example-cluster docker-druid +cd docker-druid +docker build -t docker-druid . +docker run --rm -i -p 8888:8888 docker-druid ``` ## Logging You might want to look into the logs when debugging the Druid processes. This can be done by logging into the container using `docker ps`: ``` -CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -9e73cbfc5612 druidio/example-cluster "/bin/sh -c 'export H" 7 seconds ago Up 6 seconds 2181/tcp, 2888/tcp, 3306/tcp, 3888/tcp, 8083/tcp, 0.0.0.0:3001->8081/tcp, 0.0.0.0:3000->8082/tcp sick_lamport +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +5782c4d4fa40 docker-druid "/bin/sh -c ./bin/st…" 4 seconds ago Up 3 seconds 2181/tcp, 2888/tcp, 3888/tcp, 8081-8083/tcp, 8090/tcp, 0.0.0.0:8888->8888/tcp angry_banach ``` -And attaching to the container using `docker exec -ti 9e73cbfc5612 bash` logs are written to `/tmp/`: +Run the `docker logs` command to fetch the logs. ``` -root@d59a3d4a68c3:/tmp# ls -lah -total 224K -drwxrwxrwt 8 root root 4.0K Jan 18 20:38 . -drwxr-xr-x 61 root root 4.0K Jan 18 20:38 .. --rw------- 1 root root 0 Jan 18 20:38 druid-broker-stderr---supervisor-az6WwP.log --rw------- 1 root root 18K Jan 18 20:39 druid-broker-stdout---supervisor-D28zOC.log --rw------- 1 root root 0 Jan 18 20:38 druid-coordinator-stderr---supervisor-RYMt5L.log --rw------- 1 root root 100K Jan 18 21:14 druid-coordinator-stdout---supervisor-Jq4WCi.log --rw------- 1 root root 0 Jan 18 20:38 druid-historical-stderr---supervisor-rmMHmF.log --rw------- 1 root root 18K Jan 18 20:39 druid-historical-stdout---supervisor-AJ0SZX.log --rw------- 1 root root 7.9K Jan 18 21:09 druid-indexing-service-stderr---supervisor-x3YNlo.log --rw------- 1 root root 28K Jan 18 21:14 druid-indexing-service-stdout---supervisor-5uyV7u.log --rw------- 1 root root 155 Jan 18 20:38 mysql-stderr---supervisor-NqN9nY.log --rw------- 1 root root 153 Jan 18 20:38 mysql-stdout---supervisor-23izTf.log --rw------- 1 root root 78 Jan 18 20:38 zookeeper-stderr---supervisor-Rm33j8.log --rw------- 1 root root 7.4K Jan 18 20:39 zookeeper-stdout---supervisor-6AFVOR.log +$ docker logs -f 5782c4d4fa40 +[Wed Aug 7 09:22:41 2019] Running command[zk], logging to[/opt/druid/var/sv/zk.log]: bin/run-zk conf +[Wed Aug 7 09:22:41 2019] Running command[coordinator-overlord], logging to[/opt/druid/var/sv/coordinator-overlord.log]: bin/run-druid coordinator-overlord conf/druid/single-server/micro-quickstart +[Wed Aug 7 09:22:41 2019] Running command[broker], logging to[/opt/druid/var/sv/broker.log]: bin/run-druid broker conf/druid/single-server/micro-quickstart +[Wed Aug 7 09:22:41 2019] Running command[router], logging to[/opt/druid/var/sv/router.log]: bin/run-druid router conf/druid/single-server/micro-quickstart +[Wed Aug 7 09:22:41 2019] Running command[historical], logging to[/opt/druid/var/sv/historical.log]: bin/run-druid historical conf/druid/single-server/micro-quickstart +[Wed Aug 7 09:22:41 2019] Running command[middleManager], logging to[/opt/druid/var/sv/middleManager.log]: bin/run-druid middleManager conf/druid/single-server/micro-quickstart ``` ## Troubleshooting @@ -91,4 +61,4 @@ The allocated resources are limited by default to 2 cpu's and 2gb of memory. Alt ``` 2017-01-20T15:59:58,445 INFO [forking-task-runner-0-[index_transactions_2017-01-20T15:59:50.637Z]] io.druid.indexing.overlord.ForkingTaskRunner - Process exited with status[137] for task: index_transactions_2017-01-20T15:59:50.637Z ``` -From the log we observe that the process receives an 137 (=128+9) SIGKILL signal. Because it hit the memory limit, the application is killed instantly. To avoid this you might want to give more resources to the Docker hypervisor under Docker > Preferences. +From the log we observe that the process receives an 137 (=128+9) SIGKILL signal. Because it hit the memory limit, the application is killed instantly. To avoid this you might want to give more resources to the Docker hypervisor under Docker > Preferences. \ No newline at end of file diff --git a/config/common.runtime.properties b/config/common.runtime.properties new file mode 100644 index 0000000..bfb761f --- /dev/null +++ b/config/common.runtime.properties @@ -0,0 +1,145 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Extensions specified in the load list will be loaded by Druid +# We are using local fs for deep storage - not recommended for production - use S3, HDFS, or NFS instead +# We are using local derby for the metadata store - not recommended for production - use MySQL or Postgres instead + +# If you specify `druid.extensions.loadList=[]`, Druid won't load any extension from file system. +# If you don't specify `druid.extensions.loadList`, Druid will load all the extensions under root extension directory. +# More info: http://druid.io/docs/latest/operations/including-extensions.html +druid.extensions.loadList=["druid-hdfs-storage", "druid-kafka-indexing-service", "druid-datasketches", "druid-avro-extensions"] + +# If you have a different version of Hadoop, place your Hadoop client jar files in your hadoop-dependencies directory +# and uncomment the line below to point to your directory. +#druid.extensions.hadoopDependenciesDir=/my/dir/hadoop-dependencies + + +# +# Hostname +# +druid.host=localhost + +# +# Logging +# + +# Log all runtime properties on startup. Disable to avoid logging properties on startup: +druid.startup.logging.logProperties=true + +# +# Zookeeper +# + +druid.zk.service.host=localhost +druid.zk.paths.base=/druid + +# +# Metadata storage +# + +# For Derby server on your Druid Coordinator (only viable in a cluster with a single Coordinator, no fail-over): +druid.metadata.storage.type=derby +druid.metadata.storage.connector.connectURI=jdbc:derby://localhost:1527/var/druid/metadata.db;create=true +druid.metadata.storage.connector.host=localhost +druid.metadata.storage.connector.port=1527 + +# For MySQL (make sure to include the MySQL JDBC driver on the classpath): +#druid.metadata.storage.type=mysql +#druid.metadata.storage.connector.connectURI=jdbc:mysql://db.example.com:3306/druid +#druid.metadata.storage.connector.user=... +#druid.metadata.storage.connector.password=... + +# For PostgreSQL: +#druid.metadata.storage.type=postgresql +#druid.metadata.storage.connector.connectURI=jdbc:postgresql://db.example.com:5432/druid +#druid.metadata.storage.connector.user=... +#druid.metadata.storage.connector.password=... + +# +# Deep storage +# + +# For local disk (only viable in a cluster if this is a network mount): +druid.storage.type=local +druid.storage.storageDirectory=var/druid/segments + +# For HDFS: +#druid.storage.type=hdfs +#druid.storage.storageDirectory=/druid/segments + +# For S3: +#druid.storage.type=s3 +#druid.storage.bucket=your-bucket +#druid.storage.baseKey=druid/segments +#druid.s3.accessKey=... +#druid.s3.secretKey=... + +# +# Indexing service logs +# + +# For local disk (only viable in a cluster if this is a network mount): +druid.indexer.logs.type=file +druid.indexer.logs.directory=var/druid/indexing-logs + +# For HDFS: +#druid.indexer.logs.type=hdfs +#druid.indexer.logs.directory=/druid/indexing-logs + +# For S3: +#druid.indexer.logs.type=s3 +#druid.indexer.logs.s3Bucket=your-bucket +#druid.indexer.logs.s3Prefix=druid/indexing-logs + +# +# Service discovery +# + +druid.selectors.indexing.serviceName=druid/overlord +druid.selectors.coordinator.serviceName=druid/coordinator + +# +# Monitoring +# + +druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"] +druid.emitter=noop +druid.emitter.logging.logLevel=info + +# Storage type of double columns +# ommiting this will lead to index double as float at the storage layer + +druid.indexing.doubleStorage=double + +# +# Security +# +druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.metadata.storage.connector.password"] + + +# +# SQL +# +druid.sql.enable=true + +# +# Lookups +# +druid.lookup.enableLookupSyncOnStartup=false diff --git a/docker-install.md b/docker-install.md deleted file mode 100644 index 529a5b5..0000000 --- a/docker-install.md +++ /dev/null @@ -1,39 +0,0 @@ -# Tutorial - -## Install Docker (Mac) - -[Install Homebrew](http://brew.sh/#install) - -```sh -ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" -``` - -[Install Cask](http://caskroom.io/) - -```sh -brew install caskroom/cask/brew-cask -``` - -Install Virtualbox - -```sh -brew update -brew cask install virtualbox -``` - -Docker Machine - -```sh -brew update -brew install docker-machine docker -docker-machine create --driver virtualbox default -eval "$(docker-machine env default)" -``` - -Is it working? - -``` -docker run hello-world -``` - -[build druid-docker](README.md) diff --git a/sample-data.sql b/sample-data.sql deleted file mode 100644 index 2826a31..0000000 --- a/sample-data.sql +++ /dev/null @@ -1,2 +0,0 @@ -INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}'); - diff --git a/supervisord.conf b/supervisord.conf deleted file mode 100644 index aeeff7b..0000000 --- a/supervisord.conf +++ /dev/null @@ -1,77 +0,0 @@ -[supervisord] -nodaemon=true -loglevel=debug - -[program:zookeeper] -command=/usr/local/zookeeper/bin/zkServer.sh start-foreground -user=daemon -priority=0 - -[program:mysql] -command=/usr/bin/pidproxy /var/run/mysqld/mysqld.pid /usr/bin/mysqld_safe -user=mysql -priority=0 - -[program:druid-coordinator] -user=druid -command=java - -server - -Xmx1g - -Duser.timezone=UTC - -Dfile.encoding=UTF-8 - -Ddruid.host=%(ENV_HOSTIP)s - -Ddruid.extensions.loadList=[\"mysql-metadata-storage\"] - -Ddruid.extensions.directory=/usr/local/druid/extensions - -Ddruid.extensions.hadoopDependenciesDir=/usr/local/druid/hadoop-dependencies - -Ddruid.metadata.storage.type=mysql - -Ddruid.metadata.storage.connector.connectURI=jdbc:mysql://localhost:3306/druid - -Ddruid.metadata.storage.connector.user=druid - -Ddruid.metadata.storage.connector.password=diurd - -Ddruid.coordinator.asOverlord.enabled=true - -Ddruid.coordinator.asOverlord.overlordService=druid/overlord - -Ddruid.indexer.fork.property.druid.processing.numThreads=1 - -Ddruid.indexer.storage.type=metadata - -Ddruid.indexer.queue.startDelay=PT0M - -Ddruid.indexer.runner.javaOpts="-server -Xmx1g -XX:MaxDirectMemorySize=2147483648" - -Ddruid.processing.buffer.sizeBytes=536870912 - -Ddruid.coordinator.startDelay=PT5S - -cp /usr/local/druid/lib/* - io.druid.cli.Main server coordinator -redirect_stderr=true -priority=100 - -[program:druid-historical] -user=druid -command=java - -server - -Xmx1g - -Duser.timezone=UTC - -Dfile.encoding=UTF-8 - -Ddruid.host=%(ENV_HOSTIP)s - -Ddruid.extensions.loadList=[\"druid-s3-extensions\"] - -Ddruid.extensions.directory=/usr/local/druid/extensions - -Ddruid.extensions.hadoopDependenciesDir=/usr/local/druid/hadoop-dependencies - -Ddruid.s3.accessKey=AKIAIMKECRUYKDQGR6YQ - -Ddruid.s3.secretKey=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 - -Ddruid.computation.buffer.size=67108864 - -Ddruid.segmentCache.locations="[{\"path\":\"/var/tmp/druid/indexCache\",\"maxSize\":5000000000}]" - -Ddruid.server.maxSize=5000000000 - -cp /usr/local/druid/lib/* - io.druid.cli.Main server historical -redirect_stderr=true -priority=100 - -[program:druid-broker] -user=druid -command=java - -server - -Xmx1g - -Duser.timezone=UTC - -Dfile.encoding=UTF-8 - -Ddruid.host=%(ENV_HOSTIP)s - -Ddruid.computation.buffer.size=67108864 - -Ddruid.broker.cache.sizeInBytes=33554432 - -cp /usr/local/druid/lib/* - io.druid.cli.Main server broker -redirect_stderr=true -priority=100