diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..116fa336 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +charts/.DS_Store diff --git a/README.md b/README.md index b21a8209..32e7c51a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -# Dremio Container Tools +# Dremio Cloud Tools -This repository contains: +This repository contains tools and utilities to deploy Dremio to cloud environments: -* Tools to build [Dremio Docker images](images/dremio-oss). -* Example [helm chart](charts/dremio) to deploy Dremio to Kubernetes. +* [Dockerfile](images/dremio-oss) to build Dremio Docker images. +* [Helm chart](charts/dremio) to deploy Dremio to Kubernetes. +* [Azure Resource Manager (ARM) template](azure/arm-templates) to deploy to Azure. These are currently *experimental* items and should be evaluated and extended based on individual needs. diff --git a/aws/cloudformation/README.md b/aws/cloudformation/README.md new file mode 100644 index 00000000..dc837269 --- /dev/null +++ b/aws/cloudformation/README.md @@ -0,0 +1,34 @@ + +# Deploying Dremio to AWS + +_Note:_ To try on AWS, you should have: +* Permission to create Security Groups +* An AWS key pair created +* (Optional) A VPC and subnet created if you want to install to a non-default VPC + +Try it out [![AWS Cloudformation](https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png)](https://us-east-2.console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/new?templateURL=https://s3-us-west-2.amazonaws.com/aws-cloudformation.dremio.com/dremio_cf.yaml&stackName=myDremio) + +This deploys a Dremio cluster on EC2 instances. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. + +| Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | +|--------------|---------------------|------------------|------------------| +| X-Small | m5.2xlarge | r5d.4xlarge | 1 | +| Small | m5.2xlarge | r5d.4xlarge | 5 | +| Medium | m5.4xlarge | r5d.4xlarge | 10 | +| Large | m5.4xlarge | r5d.4xlarge | 25 | +| X-Large | m5.4xlarge | r5d.4xlarge | 50 | + +Make sure you are in the AWS region you are planning to deploy your cluster in. + +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Stack name |Name of the stack. | +| Cluster Size |Pick a size based on your needs.| +| Deploy to VPC |VPC to deploy the cluster into.| +| Deploy to Subnet |Subnet to deploy the cluster into. Must be in the selected VPC.| +| Dremio Binary | Publicly accessible URL to a Dremio installation RPM | +| AWS keypair | AWS key pair to use to SSH to the VMs. SSH username for the VMs are centos (has sudo privilege). SSH into machines for changing configuration, reviewing logs, etc. | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml new file mode 100644 index 00000000..f30be27b --- /dev/null +++ b/aws/cloudformation/dremio_cf.yaml @@ -0,0 +1,404 @@ +--- +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Setup a Dremio cluster.' +Parameters: + keyName: + Type: AWS::EC2::KeyPair::KeyName + AllowedPattern: ".+" + ConstraintDescription: Must select an existing EC2 KeyPair + Description: "AWS key pair to use to SSH to the VMs. SSH username for the VMs are centos (has sudo privilege). SSH into machines for changing configuration, reviewing logs, etc." + clusterSize: + Type: String + Description: "The type and number of machines are chosen based on the size selected." + AllowedValues: + - "X-Small--1-executor" + - "Small--5-executors" + - "Medium--10-executors" + - "Large--25-executors" + - "X-Large--50-executors" + Default: "Small--5-executors" + useVPC: + Type: AWS::EC2::VPC::Id + Description: "VPC to deploy the cluster into." + useSubnet: + Type: AWS::EC2::Subnet::Id + Description: "Subnet to deploy the cluster into. Must be in the selected VPC." + dremioDownloadURL: + Type: String + Description: "(Optional) HTTP or HTTPS URL to a Dremio RPM. Leave empty to install the latest Dremio CE release." + Default: "" +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - + Label: + default: Dremio Cluster + Parameters: + - coordinatorInstanceType + - coordinatorCount + - executorInstanceType + - executorCount + - clusterSize + - useVPC + - useSubnet + - dremioDownloadURL + - + Label: + default: AWS resource configuration + Parameters: + - keyName + ParameterLabels: + keyName: + default : "AWS keypair" + clusterSize: + default: "Cluster size" + useSubnet: + default: "Deploy to Subnet" + useVPC: + default: "Deploy to VPC" + dremioDownloadURL: + default: "Dremio download URL" +Mappings: + Custom: + Variables: + URL: https://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm + ClusterSizes: + X-Small--1-executor: + coordinatorInstanceType: m5.2xlarge + coordinatorDiskSize: 10 + coordinatorMaxMemory: 28672 + executorInstanceType: r5d.4xlarge + executorCount: 1 + executorDiskSize: 10 + executorMaxMemory: 122880 + Small--5-executors: + coordinatorInstanceType: m5.2xlarge + coordinatorDiskSize: 50 + coordinatorMaxMemory: 28672 + executorInstanceType: r5d.4xlarge + executorCount: 5 + executorDiskSize: 50 + executorMaxMemory: 122880 + Medium--10-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 + executorInstanceType: r5d.4xlarge + executorCount: 10 + executorDiskSize: 100 + executorMaxMemory: 122880 + Large--25-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 + executorInstanceType: r5d.4xlarge + executorCount: 25 + executorDiskSize: 100 + executorMaxMemory: 122880 + X-Large--50-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 + executorInstanceType: r5d.4xlarge + executorCount: 50 + executorDiskSize: 100 + executorMaxMemory: 122880 + RegionMap: + # Centos 7 Images + us-east-1: # N Virginia + AMI: ami-02eac2c0129f6376b + us-east-2: # Ohio + AMI: ami-0f2b4fc905b0bd1f1 + us-west-1: # California + AMI: ami-074e2d6769f445be5 + us-west-2: # Oregon + AMI: ami-01ed306a12b7d1c96 + ca-central-1: # Québec + AMI: ami-033e6106180a626d0 + eu-central-1: # Frankfurt + AMI: ami-04cf43aca3e6f3de3 + eu-west-1: # Ireland + AMI: ami-0ff760d16d9497662 + eu-west-2: # London + AMI: ami-0eab3a90fc693af19 + ap-southeast-1: # Singapore + AMI: ami-0b4dd9d65556cac22 + ap-southeast-2: # Sydney + AMI: ami-08bd00d7713a39e7d + ap-south-1 : # Mumbai + AMI: ami-02e60be79e78fef21 + ap-northeast-1: # Tokyo + AMI: ami-045f38c93733dd48d + ap-northeast-2: # Seoul + AMI: ami-06cf2a72dadf92410 + sa-east-1: # São Paulo + AMI: ami-0b8d86d4bf91850af + SubnetConfig: + VPC: + CIDR: 10.0.0.0/16 + Public: + CIDR: 10.0.0.0/24 +Conditions: + CreateVPC: !Equals [!Ref useSubnet, ""] +Resources: + VPC: + Condition: CreateVPC + Type: AWS::EC2::VPC + Properties: + EnableDnsSupport: 'true' + EnableDnsHostnames: 'true' + CidrBlock: !FindInMap [SubnetConfig, VPC, CIDR] + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "net"]] + + PublicSubnet: + Condition: CreateVPC + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + CidrBlock: !FindInMap [SubnetConfig, Public, CIDR] + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "public"]] + - Key: Network + Value: Public + + InternetGateway: + Condition: CreateVPC + Type: AWS::EC2::InternetGateway + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "ig"]] + + VPCGatewayAttachment: + Condition: CreateVPC + Type: AWS::EC2::VPCGatewayAttachment + Properties: + VpcId: !Ref VPC + InternetGatewayId: !Ref InternetGateway + + PublicRouteTable: + Condition: CreateVPC + Type: AWS::EC2::RouteTable + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "public"]] + + PublicSubnetRouteTableAssociation: + Condition: CreateVPC + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + SubnetId: !Ref PublicSubnet + RouteTableId: !Ref PublicRouteTable + + PublicRoute: + Condition: CreateVPC + Type: AWS::EC2::Route + DependsOn: VPCGatewayAttachment + Properties: + RouteTableId: !Ref PublicRouteTable + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: !Ref InternetGateway + + DremioSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: "Dremio Access" + VpcId: !If [CreateVPC, !Ref VPC, !Ref useVPC] + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: '9047' + ToPort: '9047' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '31010' + ToPort: '31010' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '22' + ToPort: '22' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '80' + ToPort: '80' + CidrIp: 0.0.0.0/0 + AvailabilityWaitHandle: + Type: AWS::CloudFormation::WaitConditionHandle + AvailabilityWaitCondition: + Type: AWS::CloudFormation::WaitCondition + DependsOn: DremioMaster + Properties: + Handle: !Ref "AvailabilityWaitHandle" + Timeout: "600" + + DremioSecurityGroupSelfIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref DremioSecurityGroup + IpProtocol: -1 + SourceSecurityGroupId: !Ref DremioSecurityGroup + + DremioMaster: + Type: AWS::EC2::Instance + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "DremioMaster"]] + ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", AMI] + KeyName: !Ref keyName + InstanceType: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorInstanceType] + NetworkInterfaces: + - DeleteOnTermination: "true" + AssociatePublicIpAddress: "true" + DeviceIndex: 0 + SubnetId: !If [CreateVPC, !Ref PublicSubnet, !Ref useSubnet] + GroupSet: [!Ref DremioSecurityGroup] + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorDiskSize] + DeleteOnTermination: true + VolumeType: gp2 + UserData: + Fn::Base64: !Sub + - | + #!/bin/bash -x + statusFile=/tmp/statusfile + + if [ ! -d /opt/dremio ]; then + url=${dremioDownloadURL} + [ -z $url ] && url=${DOWNLOAD_URL} + yum -y install java-1.8.0-openjdk-devel $url + if [ $? != 0 ]; then + echo "{ \"Status\" : \"FAILURE\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Failed\", \"Reason\" : \"Unable to download Dremio\" }" > $statusFile + curl -T $statusFile '${AvailabilityWaitHandle}' + exit 1 + fi + fi + + DREMIO_HOME=/opt/dremio + DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + DREMIO_ENV_FILE=/etc/dremio/dremio-env + + sed -i -e "s/executor.enabled: true/executor.enabled: false/" \ + $DREMIO_CONFIG_FILE + sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ + $DREMIO_ENV_FILE + + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system + systemctl daemon-reload + systemctl start dremio + systemctl enable dremio + + until curl -Iks http://localhost:9047; do + echo waiting for website availability + sleep 2 + done + echo "{ \"Status\" : \"SUCCESS\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Ready\", \"Reason\" : \"Website Available\" }" > $statusFile + curl -T $statusFile '${AvailabilityWaitHandle}' + - DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + MEMORY_SIZE: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorMaxMemory] + + DremioExecutorLC: + Type: AWS::AutoScaling::LaunchConfiguration + DependsOn: DremioMaster + Properties: + AssociatePublicIpAddress: true + #EbsOptimized: true + ImageId: + Fn::FindInMap: + - RegionMap + - !Ref AWS::Region + - AMI + InstanceMonitoring: true + InstanceType: !FindInMap [ClusterSizes, !Ref clusterSize, executorInstanceType] + KeyName: !Ref keyName + SecurityGroups: [!Ref DremioSecurityGroup] + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorDiskSize] + DeleteOnTermination: true + VolumeType: gp2 + UserData: + Fn::Base64: !Sub + - | + #!/bin/bash -x + + if [ ! -d /opt/dremio ]; then + url=${dremioDownloadURL} + [ -z $url ] && url=${DOWNLOAD_URL} + yum -y install java-1.8.0-openjdk-devel $url + fi + + # Setup ephemeral disk for spill - this is based on executors are r5d class machines + SPILL_DIR=/var/ephemeral/spill + NVME_SPILL=nvme1n1 + + mkdir -p $SPILL_DIR + file -s /dev/$NVME_SPILL | grep "/dev/$NVME_SPILL: data" && mkfs -t xfs /dev/$NVME_SPILL && \ + UUID=$(blkid | grep $NVME_SPILL | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID $SPILL_DIR xfs defaults,nofail 0 2" >> /etc/fstab && \ + mount -a + chown dremio:dremio $SPILL_DIR + + # Setup ephemeral disk for C3 - this is based on executors are r5d class machines + CLOUDCACHE_DIR=/var/ephemeral/cloudcache + NVME_CLOUDCACHE=nvme2n1 + + mkdir -p $CLOUDCACHE_DIR + file -s /dev/$NVME_CLOUDCACHE | grep "/dev/$NVME_CLOUDCACHE: data" && mkfs -t xfs /dev/$NVME_CLOUDCACHE && \ + UUID=$(blkid | grep $NVME_CLOUDCACHE | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID $CLOUDCACHE_DIR xfs defaults,nofail 0 2" >> /etc/fstab && \ + mount -a + chown dremio:dremio $CLOUDCACHE_DIR + + DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + DREMIO_ENV_FILE=/etc/dremio/dremio-env + + sed -i -e "s/coordinator.master.enabled: true/coordinator.master.enabled: false/" \ + -e "s/coordinator.enabled: true/coordinator.enabled: false/" \ + -e "/local:/a \ \ spilling: [\"$SPILL_DIR\"]" \ + $DREMIO_CONFIG_FILE + sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ + $DREMIO_ENV_FILE + cat <> $DREMIO_CONFIG_FILE + zookeeper: "${ZK}:2181" + services.executor.cache.path.db: "$CLOUDCACHE_DIR" + services.executor.cache.path.fs: ["$CLOUDCACHE_DIR"] + services.executor.cache.pctquota.db: 10 + services.executor.cache.pctquota.fs: [100] + EOF + + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system + systemctl daemon-reload + systemctl start dremio + systemctl enable dremio + - ZK: !GetAtt DremioMaster.PrivateIp + DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + MEMORY_SIZE: !FindInMap [ClusterSizes, !Ref clusterSize, executorMaxMemory] + + DremioExecutorASG: + Type: AWS::AutoScaling::AutoScalingGroup + DependsOn: DremioExecutorLC + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", DremioExecutor]] + PropagateAtLaunch: true + ResourceType: "auto-scaling-group" + ResourceId: !Ref "AWS::StackName" + LaunchConfigurationName: !Ref DremioExecutorLC + VPCZoneIdentifier: [!If [CreateVPC, !Ref PublicSubnet, !Ref useSubnet]] + DesiredCapacity: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + MaxSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + MinSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + +Outputs: + DremioUI: + Description: Dremio UI. + Value: !Join [ "", ["http://", !GetAtt DremioMaster.PublicIp, ":9047"]] diff --git a/azure/arm-templates/README.md b/azure/arm-templates/README.md new file mode 100644 index 00000000..59094555 --- /dev/null +++ b/azure/arm-templates/README.md @@ -0,0 +1,62 @@ + +# Deploying Dremio to Azure + +You can try it out: [![Azure ARM Template](http://azuredeploy.net/deploybutton.png)](https://portal.azure.com/#create/microsoft.template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fdremio%2Fdremio-cloud-tools%2Fmaster%2Fazure%2Farm-templates%2Fazuredeploy.json) + +This deploys a Dremio cluster on Azure VMs. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. + +| Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | +|--------------|---------------------|------------------|------------------| +| X-Small | Standard_D4_v3 | Standard_E16s_v3 | 1 | +| Small | Standard_D4_v3 | Standard_E16s_v3 | 5 | +| Medium | Standard_D8_v3 | Standard_E16s_v3 | 10 | +| Large | Standard_D8_v3 | Standard_E16s_v3 | 25 | +| X-Large | Standard_D8_v3 | Standard_E16s_v3 | 50 | + +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Subscription |Azure subscription where the cluster should be deployed. | +| Resource Group |The Azure Resource group where the cluster should be deployed. You can create a new one too. It is recommended to create a new one as all resources are created in that group and deleting the group will delete all resources created. | +| Location |The Azure location where the cluster resources will be deployed. | +| Cluster Name |A name for your cluster.| +| Cluster Size |Pick a size based on your needs.| +| SSH Username |The username that can be used to login to your nodes.| +| Authentication Type |Password or Key based authentication for ssh.| +| Password or SSH Public Key |The password or ssh public key | +| Use Existing Subnet | (Optional) id of an existing subnet. The subnet must be in the same region as the Dremio cluster resource group. It is of the form /subscriptions/xxxx/resourceGroups/xxxx/providers/Microsoft.Network/virtualNetworks/xxxx/subnets/xxxx| +| Use Private IP | Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio. | +| Dremio Binary | Publicly accessible URL to a Dremio installation rpm | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. + +The deployment resources are: +``` +┌───────────────────────────┐ +│ WebUI on 9047 │ +│ JDBC/ODBC client on 31010 │ +└─────────────┬─────────────┘ + │ +┌────────────────────────────┼─────────────────────────────────────┐ +│ VirtualNetwork │ │ +│ ┌──────────────────────────▼───────────────────────────────────┐ │ +│ │ Subnet ┌──────────────────────────┐ ┌────────────────┐ │ │ +│ │ │ LoadBalancer │ │ Security Group │ │ │ +│ │ └──────────────────┬───────┘ │Allow access to │ │ │ +│ │ │ │22, 9047, 31010 │ │ │ +│ │ ┌───────────────────┘ └────────────────┘ │ │ +│ │ │ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────┐ ┌───────────────────┐ │ │ +│ │ │Master Coordinator │ │ Executor ├┐ │ │ +│ │ │ (Azure VM) │───────────▶│(Azure VM Scaleset)│├─┐ │ │ +│ │ └───────────────────┘ └┬──────────────────┘│ │ │ │ +│ │ ┌───────────────────┐ └─┬─────────────────┘ │ │ │ +│ │ │ Dremio Metadata │ └───────────────────┘ │ │ +│ │ │ (Azure Disk) │ │ │ +│ │ └───────────────────┘ │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────┘ +``` diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json new file mode 100644 index 00000000..89593a09 --- /dev/null +++ b/azure/arm-templates/azuredeploy.json @@ -0,0 +1,187 @@ + +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "clusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Dremio cluster name in Azure." + } + }, + "clusterSize": { + "type": "string", + "allowedValues": ["X-Small (1 executor)", "Small (5 executors)", "Medium (10 executors)", "Large (25 executors)", "X-Large (50 executors)"], + "metadata": { + "description": "The type and number of machines are chosen based on the size selected." + } + }, + "SSHUsername": { + "type": "string", + "defaultValue": "azuser", + "metadata": { + "description": "SSH username for the virtual machines. (Can be used to SSH into machines for changing configuration, reviewing logs, etc.)" + } + }, + "authenticationType": { + "type": "string", + "defaultValue": "password", + "allowedValues": [ + "password", + "SSHPublicKey" + ], + "metadata": { + "description": "Type of authentication to use for SSH." + } + }, + "PasswordOrSSHPublicKey": { + "type": "securestring", + "metadata": { + "description": "Password or ssh public key for the virtual machines. If password, password must be minimum 8 characters with at least 1 upper case letter, 1 lower case letter and 1 number." + } + }, + "useExistingSubnet": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Optional - resource id of existing subnet to deploy to; the subnet needs to be in the same region as the cluster. If empty, a new virtual network and subnet will be created." + } + }, + "usePrivateIP": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio." + } + }, + "dremioBinary": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Optional - publicly accessible URL to a Dremio installation rpm" + } + } + }, + "variables": { + "baseURI": "https://raw.githubusercontent.com/dremio/dremio-cloud-tools/master/azure/arm-templates/nested/", + "apiVersion": "2018-05-01", + "shortName": "[take(resourceGroup().name, 40)]", + "rgName": "[resourceGroup().name]", + "location": "[resourceGroup().location]", + "stateRgName": "[resourceGroup().name]", + "dataDiskName": "[concat(parameters('clusterName'), '-master-data-disk')]", + "dataDiskId": "[concat(subscription().id, '/resourceGroups/', variables('stateRgName'), '/providers/Microsoft.Compute/disks/', variables('dataDiskName'))]", + "clusterSizes": { + "X-Small (1 executor)": { + "coordinatorVmSize": "Standard_D4_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 1, + "metadataDiskSize": 10 + }, + "Small (5 executors)": { + "coordinatorVmSize": "Standard_D4_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 5, + "metadataDiskSize": 50 + }, + "Medium (10 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 10, + "metadataDiskSize": 100 + }, + "Large (25 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 25, + "metadataDiskSize": 100 + }, + "X-Large (50 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 50, + "metadataDiskSize": 100 + } + } + }, + "resources": [ + { + "apiVersion": "2018-02-01", + "name": "pid-1f30d282-b6d2-5dc6-9630-85533cc11b98", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "[variables('apiVersion')]", + "name": "[concat(variables('shortName'), '-state-deployment')]", + "resourceGroup": "[variables('stateRgName')]", + "dependsOn": [ + ], + "properties": { + "mode": "Incremental", + "templateLink": { + "uri": "[concat(variables('baseURI'), 'dremioState.json')]", + "contentVersion": "1.0.0.0" + }, + "parameters": { + "dremioClusterName": {"value": "[parameters('clusterName')]"}, + "dataDiskName": {"value": "[variables('dataDiskName')]"}, + "dataDiskSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].metadataDiskSize]"}, + "virtualNetworkNewOrExisting": {"value": "[if(equals(trim(parameters('useExistingSubnet')), ''), 'new', 'existing')]"}, + "existingSubnet": {"value": "[parameters('useExistingSubnet')]"} + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "[variables('apiVersion')]", + "name": "[concat(variables('shortName'), '-compute-deployment')]", + "resourceGroup": "[variables('rgName')]", + "dependsOn": [ + "[concat(variables('shortName'), '-state-deployment')]" + ], + "properties": { + "mode": "Incremental", + "templateLink": { + "uri": "[concat(variables('baseURI'), 'dremioCluster.json')]", + "contentVersion": "1.0.0.0" + }, + "parameters": { + "dremioClusterName": {"value": "[parameters('clusterName')]"}, + "executorCount": {"value": "[variables('clusterSizes')[parameters('clusterSize')].executorCount]"}, + "executorVmSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].executorVmSize]"}, + "coordinatorCount": {"value": "[variables('clusterSizes')[parameters('clusterSize')].coordinatorCount]"}, + "coordinatorVmSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].coordinatorVmSize]"}, + "dremioDownloadURL": {"value": "[parameters('dremioBinary')]"}, + "dataDiskId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.dataDiskId.value]"}, + "sshUsername": {"value": "[parameters('SSHUsername')]"}, + "sshPasswordOrKey": {"value": "[parameters('PasswordOrSSHPublicKey')]"}, + "subnetId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.subnetId.value]"}, + "loadBalancerId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.loadBalancerId.value]"}, + "nsgId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.nsgId.value]"}, + "usePrivateIP": {"value": "[parameters('usePrivateIP')]"} + } + } + } + ], + "outputs": { + "dremioUi": { + "type": "string", + "value": "[concat('http://', reference(concat(variables('shortName'), '-compute-deployment')).outputs.dremioHost.value, ':9047')]" + } + } +} diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json new file mode 100644 index 00000000..ab8e54a6 --- /dev/null +++ b/azure/arm-templates/nested/dremioCluster.json @@ -0,0 +1,506 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "dremioClusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Name for the Dremio Cluster" + } + }, + "sshUsername": { + "type": "string", + "metadata": { + "description": "SSH username for the virtual machines." + } + }, + "authenticationType": { + "type": "string", + "defaultValue": "password", + "allowedValues": [ + "password", + "sshPublicKey" + ], + "metadata": { + "description": "Type of authentication to use on the virtual machines." + } + }, + "sshPasswordOrKey": { + "type": "securestring", + "metadata": { + "description": "Password or ssh key for the virtual machines." + } + }, + "dataDiskId": { + "type": "string" + }, + "publicIpNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new public ip should be provisioned." + } + }, + "publicIpName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-master-publicip')]", + "metadata": { + "description": "Name of the public ip address" + } + }, + "publicIpDns": { + "type": "string", + "defaultValue": "[concat('dremio-master-', uniqueString(resourceGroup().id, parameters('dremioClusterName')))]", + "metadata": { + "description": "DNS of the public ip address for the VM" + } + }, + "publicIpResourceGroupName": { + "type": "string", + "defaultValue": "[resourceGroup().name]", + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "publicIpAllocationMethod": { + "type": "string", + "defaultValue": "Static", + "allowedValues": [ + "Dynamic", + "Static" + ], + "metadata": { + "description": "Allocation method for the public ip address" + } + }, + "publicIpSku": { + "type": "string", + "defaultValue": "Standard", + "allowedValues": [ + "Basic", + "Standard" + ], + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "coordinatorVmSize": { + "type": "string", + "defaultValue": "Standard_A2_v2", + "metadata": { + "description": "Size for the coordinator virtual machines." + } + }, + "coordinatorCount": { + "type": "int", + "defaultValue": 0, + "metadata": { + "description": "Number of coordinators in the cluster" + } + }, + "executorVmSize": { + "type": "string", + "defaultValue": "Standard_A2_v2", + "metadata": { + "description": "Size for the executor virtual machines." + } + }, + "executorCount": { + "type": "int", + "defaultValue": 3, + "metadata": { + "description": "Number of executors in the cluster" + } + }, + "dremioDownloadURL": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "(Optional) URL to download Dremio rpm. By default, it will install the latest CE version." + } + }, + "loadBalancerId": { + "type": "string", + "metadata": { + "description": "Loadbalancer fronting the coordinators" + } + }, + "nsgId": { + "type": "string", + "metadata": { + "description": "The security group required - ports 9047, 31010 and 22(ssh) should be allowed" + } + }, + "subnetId": { + "type": "string", + "metadata": { + "description": "The subnet in which the Dremio cluster is to be deployed" + } + }, + "usePrivateIP": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Select to use the private ip address of the subnet for Dremio access." + } + }, + "storageAccountName": { + "type": "string", + "defaultValue": "[concat('dremiometa',uniqueString(resourceGroup().id, deployment().name))]", + "metadata": { + "description": "Name of the storage account" + } + }, + "storageAccountType": { + "type": "string", + "defaultValue": "Standard_LRS", + "metadata": { + "description": "Storage account type" + } + }, + "storageKind": { + "type": "string", + "defaultValue": "StorageV2", + "metadata": { + "description": "Storage account kind" + } + }, + "storageAccessTier": { + "type": "string", + "defaultValue": "Hot", + "metadata": { + "description": "Storage access tier" + } + } + }, + "variables": { + "computeApiVersion": "2018-06-01", + "storageApiVersion": "2018-07-01", + "location": "[resourceGroup().location]", + "nicName": "[concat(parameters('dremioClusterName'), '-nic')]", + "dremioImage": { + "publisher": "OpenLogic", + "offer": "CentOS", + "sku": "7.5", + "version": "latest" + }, + "linuxConfiguration": { + "disablePasswordAuthentication": true, + "ssh": { + "publicKeys": [ + { + "path": "[concat('/home/', parameters('sshUsername'), '/.ssh/authorized_keys')]", + "keyData": "[parameters('sshPasswordOrKey')]" + } + ] + } + }, + "publicIpAddressId": { + "id": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]" + }, + "singlePlacementGroup": "true", + "enableAcceleratedNetworking": "false", + "priority": "Regular", + "ipAllocationMethod": "Dynamic", + "upgradeMode": "Manual", + "namingInfix": "[toLower(substring(concat(parameters('dremioClusterName'), uniqueString(resourceGroup().id)), 0, 9))]", + "bePoolName": "[concat(variables('namingInfix'), 'bepool')]", + "baseURI": "https://raw.githubusercontent.com/dremio/dremio-cloud-tools/master/azure/arm-templates/scripts/", + "scriptFileName": "setupDremio.sh", + "scriptURL": "[concat(variables('baseURI'), variables('scriptFileName'))]", + "install": false + }, + "resources": [ + { + "name": "[parameters('storageAccountName')]", + "type": "Microsoft.Storage/storageAccounts", + "apiVersion": "[variables('storageApiVersion')]", + "location": "[variables('location')]", + "properties": { + "accessTier": "[parameters('storageAccessTier')]", + "supportsHttpsTrafficOnly": true, + "isHnsEnabled": true + }, + "sku": { + "name": "[parameters('storageAccountType')]" + }, + "kind": "[parameters('storageKind')]" + }, + { + "condition": "[and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new'))]", + "type": "Microsoft.Network/publicIPAddresses", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[parameters('publicIpName')]", + "location": "[variables('location')]", + "sku": { + "name": "Standard" + }, + "properties": { + "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "dnsSettings": { + "domainNameLabel": "[parameters('publicIpDns')]", + "reverseFqdn": "[concat(parameters('publicIpDns'), '.', variables('location'), '.cloudapp.azure.com')]" + } + } + }, + { + "apiVersion": "[variables('computeApiVersion')]", + "type": "Microsoft.Network/networkInterfaces", + "name": "[variables('nicName')]", + "location": "[variables('location')]", + "dependsOn": [ + "[parameters('publicIpName')]" + ], + "properties": { + "ipConfigurations": [ + { + "name": "ipconfig-master", + "properties": { + "privateIPAllocationMethod": "[variables('ipAllocationMethod')]", + "subnet": { + "id": "[parameters('subnetId')]" + }, + "publicIPAddress": "[if(and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new')), variables('publicIpAddressId') , json('null'))]" + } + } + ], + "networkSecurityGroup": { + "id": "[parameters('nsgId')]" + } + } + }, + { + "apiVersion": "[variables('computeApiVersion')]", + "type": "Microsoft.Compute/virtualMachines", + "name": "[concat(parameters('dremioClusterName'), '-master')]", + "location": "[variables('location')]", + "dependsOn": [ + "[variables('nicName')]", + "[parameters('storageAccountName')]" + ], + "properties": { + "hardwareProfile": { + "vmSize": "[parameters('coordinatorVmSize')]" + }, + "osProfile": { + "computerName": "[parameters('dremioClusterName')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]", + "linuxConfiguration": "[if(equals(parameters('authenticationType'), 'password'), json('null'), variables('linuxConfiguration'))]" + }, + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "caching": "ReadWrite", + "createOption": "FromImage" + }, + "dataDisks": [ + { + "lun": 0, + "managedDisk": { + "id": "[parameters('dataDiskId')]" + }, + "caching": "ReadWrite", + "createOption": "Attach" + } + ] + }, + "networkProfile": { + "networkInterfaces": [ + { + "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('nicName'))]" + } + ] + } + }, + "resources": [ + { + "type": "extensions", + "name": "configScript", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": [ + "[concat(parameters('dremioClusterName'), '-master')]" + ], + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ] + }, + "protectedSettings": { + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' master ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value)]" + } + } + } + ] + }, + { + "name": "[concat(parameters('dremioClusterName'), '-coordinators')]", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": ["[concat(parameters('dremioClusterName'), '-master')]"], + "condition": "[variables('install')]", + "sku": { + "name": "[parameters('coordinatorVmSize')]", + "tier": "Standard", + "capacity": "[parameters('coordinatorCount')]" + }, + "properties": { + "overprovision": "true", + "upgradePolicy": { + "mode": "[variables('upgradeMode')]" + }, + "singlePlacementGroup": "[variables('singlePlacementGroup')]", + "virtualMachineProfile": { + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "createOption": "FromImage", + "caching": "ReadWrite" + } + }, + "priority": "[variables('priority')]", + "osProfile": { + "computerNamePrefix": "[variables('namingInfix')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]" + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'Nic')]", + "properties": { + "primary": "true", + "enableAcceleratedNetworking": "[variables('enableAcceleratedNetworking')]", + "ipConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'ipconfig-coordinators')]", + "properties": { + "subnet": { + "id": "[parameters('subnetId')]" + }, + "loadBalancerBackendAddressPools": [ + { + "id": "[concat(parameters('loadBalancerId'), '/backendAddressPools/', variables('bePoolName'))]" + } + ] + } + } + ] + } + } + ] + }, + "extensionProfile": { + "extensions": [ + { + "name": "updatescriptextension", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ], + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' coordinator ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value, ' ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + } + } + } + ] + } + } + } + + }, + { + "name": "[concat(parameters('dremioClusterName'), '-executors')]", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": ["[concat(parameters('dremioClusterName'), '-master')]"], + "sku": { + "name": "[parameters('executorVmSize')]", + "tier": "Standard", + "capacity": "[parameters('executorCount')]" + }, + "properties": { + "overprovision": "true", + "upgradePolicy": { + "mode": "[variables('upgradeMode')]" + }, + "singlePlacementGroup": "[variables('singlePlacementGroup')]", + "virtualMachineProfile": { + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "createOption": "FromImage", + "caching": "ReadWrite" + } + }, + "priority": "[variables('priority')]", + "osProfile": { + "computerNamePrefix": "[variables('namingInfix')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]" + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'Nic')]", + "properties": { + "primary": "true", + "enableAcceleratedNetworking": "[variables('enableAcceleratedNetworking')]", + "ipConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'ipconfig-executors')]", + "properties": { + "subnet": { + "id": "[parameters('subnetId')]" + } + } + } + ] + } + } + ] + }, + "extensionProfile": { + "extensions": [ + { + "name": "updatescriptextension", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ], + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' executor ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value, ' ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + } + } + } + ] + } + } + } + + } + ], + "outputs": { + "dremioHost": { + "type": "string", + "value": "[if(parameters('usePrivateIP'), first(reference(variables('nicName')).ipConfigurations).properties.privateIPAddress, reference(parameters('publicIpName'), variables('computeApiVersion')).dnsSettings.fqdn)]" + } + } +} diff --git a/azure/arm-templates/nested/dremioState.json b/azure/arm-templates/nested/dremioState.json new file mode 100644 index 00000000..47359950 --- /dev/null +++ b/azure/arm-templates/nested/dremioState.json @@ -0,0 +1,401 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "dremioClusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Name for the Dremio Cluster" + } + }, + "dataDiskName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-data-disk')]", + "metadata": { + "description": "Name for the Dremio Master Data Disk" + } + }, + "dataDiskSize": { + "type": "int", + "defaultValue": 20, + "metadata": { + "description": "Size of the Dremio Master Data Disk" + } + }, + "virtualNetworkNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new virtual network should be provisioned." + } + }, + "addressPrefixes": { + "type": "array", + "defaultValue": [ + "10.0.0.0/16" + ], + "metadata": { + "description": "Address prefix of the virtual network" + } + }, + "subnetName": { + "type": "string", + "defaultValue": "default", + "metadata": { + "description": "Name of the subnet" + } + }, + "subnetPrefix": { + "type": "string", + "defaultValue": "10.0.0.0/24", + "metadata": { + "description": "Subnet prefix of the virtual network" + } + }, + "publicIpNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new public ip should be provisioned." + } + }, + "publicIpName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-webui-publicip')]", + "metadata": { + "description": "Name of the public ip address" + } + }, + "publicIpDns": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-dremio-', uniqueString(resourceGroup().id, deployment().name))]", + "metadata": { + "description": "DNS of the public ip address for the VM" + } + }, + "publicIpResourceGroupName": { + "type": "string", + "defaultValue": "[resourceGroup().name]", + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "publicIpAllocationMethod": { + "type": "string", + "defaultValue": "Static", + "allowedValues": [ + "Dynamic", + "Static" + ], + "metadata": { + "description": "Allocation method for the public ip address" + } + }, + "publicIpSku": { + "type": "string", + "defaultValue": "Basic", + "allowedValues": [ + "Basic", + "Standard" + ], + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "externalLoadBalancer": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Create an external load balancer" + } + }, + "existingSubnet": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "(Optional) - existing subnet" + } + } + }, + "variables": { + "computeApiVersion": "2018-06-01", + "location": "[resourceGroup().location]", + "virtualNetworkName": "[concat(parameters('dremioClusterName'), '-vnet')]", + "publicIpAddressId": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]", + "networkSecurityGroupName": "dremio-nsg", + "singlePlacementGroup": "true", + "enableAcceleratedNetworking": "false", + "priority": "Regular", + "ipAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "upgradeMode": "Manual", + "namingInfix": "[toLower(substring(concat(parameters('dremioClusterName'), uniqueString(resourceGroup().id)), 0, 9))]", + "loadBalancerName": "[concat(variables('namingInfix'), '-lb')]", + "lbID": "[resourceId('Microsoft.Network/loadBalancers',variables('loadBalancerName'))]", + "natPoolName": "[concat(variables('namingInfix'), 'natpool')]", + "bePoolName": "[concat(variables('namingInfix'), 'bepool')]", + "natStartPort": 50000, + "natEndPort": 50119, + "natBackendPort": 9047, + "frontEndIPConfigId": "[concat(variables('lbID'),'/frontendIPConfigurations/loadBalancerFrontEnd')]", + "backendAddressPoolId": "[concat(variables('lbID'),'/backendAddressPools/', variables('bePoolName'))]", + "externallb": "[or(parameters('externalLoadBalancer'), equals(trim(parameters('existingSubnet')), ''))]", + "lbfrontEndIPConfig": "[if(variables('externallb'), variables('externallbFronEndIpConfig'), variables('internallbFrontEndIpConfig'))]", + "externallbFronEndIpConfig": { + "publicIPAddress": { + "id": "[variables('publicIpAddressId')]" + } + }, + "internallbFrontEndIpConfig": { + "subnet": { + "privateIPAllocationMethod": "Dynamic", + "id": "[parameters('existingSubnet')]" + } + }, + "networkAclsBypass": "AzureServices", + "networkAclsDefaultAction": "Deny", + "install": false + }, + "resources": [ + { + "type": "Microsoft.Compute/disks", + "sku": { + "name": "StandardSSD_LRS", + "tier": "Standard" + }, + "name": "[parameters('dataDiskName')]", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "scale": null, + "properties": { + "creationData": { + "createOption": "Empty" + }, + "diskSizeGB": "[parameters('dataDiskSize')]" + } + }, + { + "condition": "[equals(parameters('virtualNetworkNewOrExisting'), 'new')]", + "type": "Microsoft.Network/virtualNetworks", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[variables('virtualNetworkName')]", + "location": "[variables('location')]", + "properties": { + "addressSpace": { + "addressPrefixes": "[parameters('addressPrefixes')]" + }, + "subnets": [ + { + "name": "[parameters('subnetName')]", + "properties": { + "addressPrefix": "[parameters('subnetPrefix')]" + } + } + ] + } + }, + { + "name": "[variables('networkSecurityGroupName')]", + "type": "Microsoft.Network/networkSecurityGroups", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "properties": { + "securityRules": [ + { + "name": "default-allow-ssh", + "properties": { + "priority": 1000, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "22", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + }, + { + "name": "default-allow-dremio-ui", + "properties": { + "priority": 100, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "9047", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + }, + { + "name": "default-allow-dremio-client", + "properties": { + "priority": 110, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "31010", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + }, + { + "name": "default-allow-for-letsencrypt", + "properties": { + "priority": 1100, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "80", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + } + ] + } + }, + { + "condition": "[and(variables('install'), variables('externallb'))]", + "type": "Microsoft.Network/publicIPAddresses", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[parameters('publicIpName')]", + "location": "[variables('location')]", + "sku": { + "name": "Standard" + }, + "properties": { + "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "dnsSettings": { + "domainNameLabel": "[parameters('publicIpDns')]" + } + } + }, + { + "condition": "[variables('install')]", + "type": "Microsoft.Network/loadBalancers", + "name": "[variables('loadBalancerName')]", + "location": "[variables('location')]", + "apiVersion": "[variables('computeApiVersion')]", + "sku": { + "name": "Standard" + }, + "dependsOn": [ + "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]", + "[concat('Microsoft.Network/publicIPAddresses/', parameters('publicIpName'))]" + ], + "properties": { + "frontendIPConfigurations": [ + { + "name": "LoadBalancerFrontEnd", + "properties": "[variables('lbfrontEndIPConfig')]" + } + ], + "backendAddressPools": [ + { + "name": "[variables('bePoolName')]" + } + ], + "loadBalancingRules": [ + { + "name": "dremio-ui", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigId')]" + }, + "frontendPort": 9047, + "backendPort": 9047, + "enableFloatingIP": false, + "idleTimeoutInMinutes": 4, + "protocol": "Tcp", + "enableTcpReset": false, + "loadDistribution": "SourceIP", + "disableOutboundSnat": false, + "backendAddressPool": { + "id": "[variables('backendAddressPoolId')]" + }, + "probe": { + "id": "[concat(variables('lbID'), '/probes/dremio-ui')]" + } + } + }, + { + "name": "dremio-client", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigId')]" + }, + "frontendPort": 31010, + "backendPort": 31010, + "enableFloatingIP": false, + "idleTimeoutInMinutes": 4, + "protocol": "Tcp", + "enableTcpReset": false, + "loadDistribution": "SourceIP", + "disableOutboundSnat": false, + "backendAddressPool": { + "id": "[variables('backendAddressPoolId')]" + }, + "probe": { + "id": "[concat(variables('lbID'), '/probes/dremio-ui')]" + } + } + } + ], + "probes": [ + { + "name": "dremio-ui", + "properties": { + "protocol": "Tcp", + "port": 9047, + "intervalInSeconds": 15, + "numberOfProbes": 2 + } + } + ], + "inboundNatRules": [], + "outboundRules": [], + "inboundNatPools": [ + { + "name": "[variables('natPoolName')]", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigID')]" + }, + "protocol": "tcp", + "idleTimeoutInMinutes": 4, + "enableFloatingIP": false, + "enableTcpReset": false, + "frontendPortRangeStart": "[variables('natStartPort')]", + "frontendPortRangeEnd": "[variables('natEndPort')]", + "backendPort": "[variables('natBackendPort')]" + } + } + ] + } + } + ], + "outputs": { + "dataDiskId": { + "type": "string", + "value": "[resourceId('Microsoft.Compute/disks/', parameters('dataDiskName'))]" + }, + "subnetId": { + "type": "string", + "value": "[if(equals(parameters('virtualNetworkNewOrExisting'), 'new'), resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks/subnets/', variables('virtualNetworkName'), parameters('subnetName')), parameters('existingSubnet'))]" + }, + "loadBalancerId": { + "type": "string", + "value": "[if(variables('install'), resourceId('Microsoft.Network/loadBalancers/', variables('loadBalancerName')), '')]" + }, + "nsgId": { + "type": "string", + "value": "[resourceId('Microsoft.Network/networkSecurityGroups/', variables('networkSecurityGroupName'))]" + }, + "dremioUIAddress": { + "type": "string", + "value": "[if(variables('install'), if(variables('externallb'), reference(concat('Microsoft.Network/publicIPAddresses/', parameters('publicIpName')), variables('computeApiVersion')).dnsSettings.fqdn, first(reference(variables('loadBalancerName')).frontendIPConfigurations).properties.privateIPAddress), '')]" + } + } +} diff --git a/azure/arm-templates/scripts/setupDremio.sh b/azure/arm-templates/scripts/setupDremio.sh new file mode 100644 index 00000000..dc4fd877 --- /dev/null +++ b/azure/arm-templates/scripts/setupDremio.sh @@ -0,0 +1,199 @@ +#/bin/bash -e + +[ -z $DOWNLOAD_URL ] && DOWNLOAD_URL=http://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm +if [ ! -f /opt/dremio/bin/dremio ]; then + command -v yum >/dev/null 2>&1 || { echo >&2 "This script works only on Centos or Red Hat. Aborting."; exit 1; } + yum install -y java-1.8.0-openjdk-devel $DOWNLOAD_URL +fi + +service=$1 +if [ -z "$service" ]; then + echo "Require the service to start - master, coordinator or executor" + exit 1 +fi +storage_account=$2 +access_key=$3 + +if [ -n "$storage_account" -a -n "$access_key" ]; then + use_azure_storage=1 +fi + +# In Azure, /dev/sdb is ephemeral storage mapped to /mnt/resource. +# Additional disks are mounted after that... +DISK_NAME=/dev/sdc +DISK_PART=${DISK_NAME}1 +DREMIO_HOME=/opt/dremio +DREMIO_CONFIG_DIR=/etc/dremio +DREMIO_CONFIG_FILE=$DREMIO_CONFIG_DIR/dremio.conf +DREMIO_DATA_DIR=/var/lib/dremio +# Azure Linux VMs have ephemeral/temporary disk +# always mounted on /mnt/resource/dremio +SPILL_DIR=/mnt/resource/dremio + +function partition_disk { + parted $DISK_NAME mklabel msdos + parted -s $DISK_NAME mkpart primary ext4 0% 100% + mkfs -t ext4 $DISK_PART +} + +if [ "$service" == "master" ]; then + lsblk -no FSTYPE $DISK_NAME | grep ext4 || partition_disk + mount $DISK_PART $DREMIO_DATA_DIR + chown dremio:dremio $DREMIO_DATA_DIR + echo "$DISK_PART $DREMIO_DATA_DIR ext4 defaults 0 0" >> /etc/fstab +else + if [ -n '$use_azure_storage' ]; then + zookeeper=$4 + else + zookeeper=$2 + fi + if [ -z "$zookeeper" ]; then + echo "Non-master node requires zookeeper host" + exit 2 + fi +fi + +function setup_spill { + chmod +w /etc/sysconfig/dremio + cat >> /etc/sysconfig/dremio < /dev/null; do echo waiting for dremio master; sleep 2; done; + configure_dremio_dist + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/executor.enabled: true/executor.enabled: false/" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE +} + +function setup_executor { + configure_dremio_dist + setup_spill + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/coordinator.enabled: true/coordinator.enabled: false/; \ + /local:/a \ \ spilling: [\"$SPILL_DIR/spill\"]" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE +} + +function storage_create_action { + resource=$1 + resource_type=$2 + blob_store_url="dfs.core.windows.net" + authorization="SharedKey" + request_method="PUT" + request_date=$(TZ=GMT date "+%a, %d %h %Y %H:%M:%S %Z") + storage_service_version="2018-11-09" + # HTTP Request headers + x_ms_date_h="x-ms-date:$request_date" + x_ms_version_h="x-ms-version:$storage_service_version" + content_length_h="Content-Length: 0" + # Build the signature string + canonicalized_headers="${x_ms_date_h}\n${x_ms_version_h}" + canonicalized_resource="/${storage_account}/${resource}\nresource:${resource_type}" + string_to_sign="${request_method}\n\n\n\n\n\n\n\n\n\n\n\n${canonicalized_headers}\n${canonicalized_resource}" + # Decode the Base64 encoded access key, convert to Hex. + decoded_hex_key="$(echo -n $access_key | base64 -d -w0 | xxd -p -c256)" + # Create the HMAC signature for the Authorization header + signature=$(printf "$string_to_sign" | openssl dgst -sha256 -mac HMAC -macopt "hexkey:$decoded_hex_key" -binary | base64 -w0) + authorization_header="Authorization: $authorization $storage_account:$signature" + curl \ + -X $request_method \ + -H "$content_length_h" \ + -H "$x_ms_date_h" \ + -H "$x_ms_version_h" \ + -H "$authorization_header" \ + "https://${storage_account}.${blob_store_url}/${resource}?resource=${resource_type}" + return $? +} + +function write_coresite_xml { +cat > $DREMIO_CONFIG_DIR/core-site.xml < + + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + $storage_account + + + dremio.azure.key + The shared access key for the storage account. + $access_key + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + +EOF +} + +function update_dremio_config { +cat >> $DREMIO_CONFIG_FILE < 31010:32390/TCP,9047:30670/TCP 1h ``` -where there is no external ip and the Dremio master is running on node "localhost", you can get to Dremio UI using: -http://localhost:30670 +Where there is no external IP and the Dremio master is running on node +"localhost", you can get to Dremio UI using: +http://localhost:30670 #### Dremio Client Port -The port 31010 is used for ODBC and JDBC connections. You can look up service dremio-client in kubernetes to find the host to use for ODBC or JDBC connections. Depending on your kubernetes cluster supporting serviceType LoadBalancer, you will use the load balancer external-ip or the node on which a coordinator is running. + +The port 31010 is used for ODBC and JDBC connections. You can look up +service dremio-client in kubernetes to find the host to use for ODBC +or JDBC connections. Depending on your kubernetes cluster supporting +serviceType LoadBalancer, you will use the load balancer external-ip +or the node on which a coordinator is running. ```bash kubectl get services dremio-client @@ -61,46 +102,82 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -For example, in the above output, the service is exposed on an external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC connections. +For example, in the above output, the service is exposed on an +external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC +connections. #### Viewing logs -Logs are written to the container's console. All the logs - server.log, server.out, server.gc and access.log - are written into the console simultaneously. You can view the logs using kubectl. -``` -kubectl logs -``` -You can also tail the logs using the -f parameter. -``` -kubectl logs -f -``` + +Logs are written to the container's console. All the logs - +server.log, server.out, server.gc and access.log - are written into +the console simultaneously. You can view the logs using kubectl. ``` +kubectl logs ``` You can also tail the logs using the +-f parameter. ``` kubectl logs -f ``` #### Scale by adding additional Coordinators or Executors (optional) -Get the name of the helm release. In the example below, the release name is plundering-alpaca. + +Get the name of the helm release. In the example below, the release +name is plundering-alpaca: + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 default ``` -Add additional coordinators +Add additional coordinators: + ```bash helm upgrade dremio --set coordinator.count=3 ``` -Add additional executors +Add additional executors: + ```bash helm upgrade dremio --set executor.count=5 ``` You can also scale down the same way. +### Running offline dremio-admin commands + +Administration commands restore, cleanup and set-password in +dremio-admin needs to be run when the Dremio cluster is not +running. So, before running these commands, you need to shutdown the +Dremio cluster. Use the helm delete command to delete the helm +release. (Kubernetes does not delete the persistent store volumes +when you delete statefulset pods and when you install the cluster +again using helm, the existing persistent store will be used and you +will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using: + +```bash +helm install --wait dremio --set DremioAdmin=true +``` +Once the pod is running, you can connect to the pod using: + +```bash +kubectl exec -it dremio-admin -- bash +``` +Now, you have a bash shell from where you can run the dremio-admin commands. + +Once you are done, you can delete the helm release for the +dremio-admin and start your Dremio cluster. + #### Upgrading Dremio -You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. + +You should attempt upgrade when no queries are running on the +cluster. Update the Dremio image tag in your values.yaml file. E.g: + ```bash image: dremio/dremio-oss:3.0.0 ... ``` -Get the name of the helm release. In the example below, the release name is plundering-alpaca. +Get the name of the helm release. In the example below, the release +name is plundering-alpaca. + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE @@ -108,20 +185,45 @@ plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 defaul ``` Upgrade the deployment via helm upgrade command: + ``` helm upgrade . ``` -Existing pods will be terminated and new pods will be created with the new image. You can +Existing pods will be terminated and new pods will be created with the +new image. You can + monitor the status of the pods by running: ``` kubectl get pods ``` -Once all the pods are restarted and running, your Dremio cluster is upgraded. +Once all the pods are restarted and running, your Dremio cluster is +upgraded. #### Customizing Dremio configuration -Dremio configuration files used by the deployment are in the config directory. These files are propagated to all the pods in the cluster. Updating the configuration and upgrading the helm release - just like doing an upgrade - would refresh all the pods with the new configuration. [Dremio documentation](https://docs.dremio.com/deployment/README-config.html) covers the configuration capabilities in Dremio. - -If you need to add a core-site.xml, you can add the file to the config directory and it will be propagated to all the pods on install or upgrade of the deployment. +Dremio configuration files used by the deployment are in the config +directory. These files are propagated to all the pods in the +cluster. Updating the configuration and upgrading the helm release - +just like doing an upgrade - would refresh all the pods with the new +configuration. [Dremio +documentation](https://docs.dremio.com/deployment/README-config.html) +covers the configuration capabilities in Dremio. + +If you need to add a core-site.xml, you can add the file to the config +directory and it will be propagated to all the pods on install or +upgrade of the deployment. + +#### Important Changes + +2019-09-19 (v0.1.0): BREAKING CHANGE. + + Dremio versions before 4.0.0 are no longer supported by this Helm + chart. Dremio image specifier was split into an imageName and + imageTag parts to follow best practices. "dist" value in + dremio.conf moved to cloud storage where possible (otherwise + defaults to pdfs) -- this will lose any previously extant + reflections materialisations, user uploads, scratch files, etc. + Also added Cloud Cache support (new in Dremio 4.0). Please see + values.yaml for details on this new configuration. diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml new file mode 100644 index 00000000..3283eb32 --- /dev/null +++ b/charts/dremio/config/core-site.xml @@ -0,0 +1,93 @@ + + + +{{- if and .Values.distStorage.type (ne .Values.distStorage.type "local") }} + + {{- if eq .Values.distStorage.type "aws" }} + + fs.dremioS3.impl + The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem + com.dremio.plugins.s3.store.S3FileSystem + + + fs.s3a.aws.credentials.provider + The credential provider type. + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.distStorage.aws.secret}} + + {{- end }} + + {{- if eq .Values.distStorage.type "azure" }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.distStorage.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + + {{- if eq .Values.distStorage.type "azureStorage" }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} +{{- end }} + diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 0aa7b656..6dbe4f9a 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -15,20 +15,68 @@ # paths: { - # the local path for dremio to store data. + # Local path for dremio to store data. local: ${DREMIO_HOME}"/data" - - # the distributed path Dremio data including job results, downloads, uploads, etc - #dist: "pdfs://"${paths.local}"/pdfs" + # Distributed path Dremio data including job results, downloads, + # uploads, etc + {{- if ne .Values.distStorage.type "local" }} + results: "pdfs://"${paths.local}"/results" + {{- if eq .Values.distStorage.type "aws" }} + dist: "dremioS3:///{{ required "AWS bucketname required" .Values.distStorage.aws.bucketName }}{{ required "Path required" .Values.distStorage.aws.path }}" + {{- else if eq .Values.distStorage.type "azure" }} + dist: "dremioAdl://{{ required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Path required" .Values.distStorage.azure.path }}" + {{- else if eq .Values.distStorage.type "azureStorage" }} + dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem }}{{ required "Path required" .Values.distStorage.azureStorage.path }}" + {{- end }} + {{- else }} + dist: "pdfs://"${paths.local}"/pdfs" + {{- end }} } services: { # The services running are controlled via command line options passed in # while starting the services via kubernetes. Updating the three values - # below will not impact what services are running. + # below will not impact what services are running. # coordinator.enabled: true, # coordinator.master.enabled: true, # executor.enabled: true # # Other service parameters can be customized via this file. + + # Cloud Cache is supported in Dremio 4.0.0+. + {{- if and .Values.executor.cloudCache.enabled (or (ge .Values.imageTag "4.0.0") (eq .Values.imageTag "latest")) }} + executor: { + cache: { + path.db: "/var/lib/dremio", + path.fs: ["/var/lib/dremio"], + pctquota.db: {{ .Values.executor.cloudCache.quota.db_pct }}, + pctquota.fs: [{{ .Values.executor.cloudCache.quota.fs_pct }}] + } + } + {{- end }} +} + +{{- if and .Values.executor.cloudCache.enabled (ne .Values.distStorage.type "local") }} +debug: { + # Enable caching for distributed storage, it is turned off by default + dist.caching.enabled: true, + # Max percent of total available cache space to use when possible + # for distributed storage + dist.max.cache.space.percent: {{ .Values.executor.cloudCache.quota.cache_pct }} } +{{- end }} + +{{- if .Values.tls.ui.enabled }} +services.coordinator.web.ssl.enabled: true +services.coordinator.web.ssl.auto-certificate.enabled: false + +services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" +{{- end }} + +{{- if .Values.tls.client.enabled }} +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in +# Dremio Enterprise Edition. +services.coordinator.client-endpoint.ssl.enabled: true +services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false +services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" +{{- end }} diff --git a/charts/dremio/config/logback-access.xml b/charts/dremio/config/logback-access.xml index c0f2ed28..a00ae338 100644 --- a/charts/dremio/config/logback-access.xml +++ b/charts/dremio/config/logback-access.xml @@ -24,8 +24,11 @@ ${dremio.log.path}/access.log - ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.log.gz + ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.%i.log.gz 30 + + 100MB + diff --git a/charts/dremio/config/logback-admin.xml b/charts/dremio/config/logback-admin.xml new file mode 100644 index 00000000..b393d02b --- /dev/null +++ b/charts/dremio/config/logback-admin.xml @@ -0,0 +1,67 @@ + + + + + + %msg%n%ex{0}%n + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + + %date{ISO8601} [%thread] %-5level %logger{30} - %msg%n + + + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + ${dremio.admin.log.path} + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + + + + + + diff --git a/charts/dremio/config/logback.xml b/charts/dremio/config/logback.xml index 8999c3bc..0ab3528b 100644 --- a/charts/dremio/config/logback.xml +++ b/charts/dremio/config/logback.xml @@ -30,7 +30,22 @@ ${dremio.log.path}/server.log - ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.log.gz + ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + ${dremio.log.path}/metadata_refresh.log + + ${dremio.log.path}/archive/metadata_refresh.%d{yyyy-MM-dd}.log.gz 30 @@ -42,8 +57,11 @@ ${dremio.log.path}/json/server.json - ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.json.gz + ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.%i.json.gz 30 + + 100MB + @@ -56,15 +74,18 @@ message - + ${dremio.log.path}/queries.json - ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.json.gz + ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.%i.json.gz 30 + + 100MB + @@ -87,6 +108,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + ${dremio.log.path}/hive.deprecated.function.warning.log + + ${dremio.log.path}/archive/hive.deprecated.function.warning.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml new file mode 100644 index 00000000..feda9d9b --- /dev/null +++ b/charts/dremio/templates/dremio-admin.yaml @@ -0,0 +1,40 @@ +{{ if .Values.DremioAdmin }} +# dremio-admin pod is used to run offline commands like +# clean, restore or set-password against the Dremio cluster. +# The Dremio cluster should be shutdown before attempting to +# create the dremio-admin pod. +# You connect to the pod (kubectl exec -it dremio-admin -- bash), +# go to /opt/dremio/bin and run dremio-admin commands as documented. +apiVersion: v1 +kind: Pod +metadata: + name: dremio-admin +spec: + containers: + - name: dremio-admin + image: {{.Values.image}}:{{.Values.imageTag}} + imagePullPolicy: IfNotPresent + stdin: true + tty: true + resources: + requests: + memory: {{.Values.coordinator.memory}}M + cpu: {{.Values.coordinator.cpu}} + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + command: ["sleep", "infinity"] + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + - name: {{ .Values.imagePullSecrets }} + {{- end}} + volumes: + - name: dremio-master-volume + persistentVolumeClaim: + claimName: dremio-master-volume-dremio-master-0 + - name: dremio-config + configMap: + name: dremio-config +{{ end }} diff --git a/charts/dremio/templates/dremio-configmap.yaml b/charts/dremio/templates/dremio-configmap.yaml index 33fb22bd..442ca1dd 100644 --- a/charts/dremio/templates/dremio-configmap.yaml +++ b/charts/dremio/templates/dremio-configmap.yaml @@ -3,4 +3,4 @@ kind: ConfigMap metadata: name: dremio-config data: - {{- (.Files.Glob "config/*").AsConfig | nindent 2 }} + {{- tpl (.Files.Glob "config/*").AsConfig . | nindent 2 }} diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index bfede34c..3089c1ad 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -7,11 +8,6 @@ spec: replicas: {{.Values.coordinator.count}} podManagementPolicy: "Parallel" revisionHistoryLimit: 1 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 selector: matchLabels: app: dremio-coordinator @@ -24,9 +20,15 @@ spec: dremio-configmap/checksum: {{ (.Files.Glob "config/*").AsConfig | sha256sum }} spec: terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -35,6 +37,10 @@ spec: volumeMounts: - name: dremio-config mountPath: /opt/dremio/conf + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} env: - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB value: "{{ template "HeapMemory" .Values.coordinator.memory }}" @@ -62,11 +68,80 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] + {{- if .Values.tls.ui.enabled }} + - name: generate-ui-keystore + image: {{.Values.image}}:{{.Values.imageTag}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/ui.pkcs12" + - "-passout" + - "pass:" + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: generate-client-keystore + image: {{.Values.image}}:{{.Values.imageTag}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/client.pkcs12" + - "-passout" + - "pass:" + {{- end }} volumes: - name: dremio-config configMap: name: dremio-config + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if .Values.tls.ui.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ .Values.tls.ui.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ .Values.tls.client.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: - name: {{ .Values.imagePullSecrets }} {{- end}} +{{ end }} diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 8facdd0c..150e1fe1 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -7,11 +8,6 @@ spec: replicas: {{.Values.executor.count}} podManagementPolicy: "Parallel" revisionHistoryLimit: 1 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 selector: matchLabels: app: dremio-executor @@ -24,9 +20,15 @@ spec: dremio-configmap/checksum: {{ (.Files.Glob "config/*").AsConfig | sha256sum }} spec: terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-executor - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -59,9 +61,10 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] - # since we're mounting a separate volume, reset permission to dremio uid/gid + # since we're mounting a separate volume, reset permission to + # dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -91,3 +94,4 @@ spec: resources: requests: storage: {{.Values.executor.volumeSize}} +{{ end }} diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 1930ba06..51619193 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -27,9 +28,15 @@ spec: - dremio-master topologyKey: "kubernetes.io/hostname" terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-master-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -40,6 +47,10 @@ spec: mountPath: /opt/dremio/data - name: dremio-config mountPath: /opt/dremio/conf + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} env: - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB value: "{{ template "HeapMemory" .Values.coordinator.memory }}" @@ -63,6 +74,15 @@ spec: name: client - containerPort: 45678 name: server + readinessProbe: + httpGet: + path: / + {{- if .Values.tls.ui.enabled }} + scheme: HTTPS + {{- end }} + port: 9047 + initialDelaySeconds: 5 + periodSeconds: 5 initContainers: - name: start-only-one-master image: busybox @@ -72,7 +92,7 @@ spec: command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -84,7 +104,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume @@ -92,10 +112,78 @@ spec: command: ["/opt/dremio/bin/dremio-admin"] args: - "upgrade" + {{- if .Values.tls.ui.enabled }} + - name: generate-ui-keystore + image: {{.Values.image}}:{{.Values.imageTag}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/ui.pkcs12" + - "-passout" + - "pass:" + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: generate-client-keystore + image: {{.Values.image}}:{{.Values.imageTag}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/client.pkcs12" + - "-passout" + - "pass:" + {{- end }} volumes: - name: dremio-config configMap: name: dremio-config + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if .Values.tls.ui.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ .Values.tls.ui.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ .Values.tls.client.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: - name: {{ .Values.imagePullSecrets }} @@ -111,3 +199,4 @@ spec: resources: requests: storage: {{.Values.coordinator.volumeSize}} +{{ end }} diff --git a/charts/dremio/templates/dremio-service-client.yaml b/charts/dremio/templates/dremio-service-client.yaml index 8149368e..a1906738 100644 --- a/charts/dremio/templates/dremio-service-client.yaml +++ b/charts/dremio/templates/dremio-service-client.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: v1 kind: Service metadata: @@ -38,3 +39,4 @@ spec: clusterIP: None selector: role: dremio-cluster-pod +{{ end }} diff --git a/charts/dremio/templates/zookeeper.yaml b/charts/dremio/templates/zookeeper.yaml index a272234c..4ad5a9c6 100644 --- a/charts/dremio/templates/zookeeper.yaml +++ b/charts/dremio/templates/zookeeper.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: v1 kind: Service metadata: @@ -48,7 +49,7 @@ spec: matchLabels: app: zk serviceName: zk-hs - replicas: 1 + replicas: {{.Values.zookeeper.count}} updateStrategy: type: RollingUpdate podManagementPolicy: Parallel @@ -67,14 +68,20 @@ spec: values: - zk topologyKey: "kubernetes.io/hostname" + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: kubernetes-zookeeper imagePullPolicy: Always image: "k8s.gcr.io/kubernetes-zookeeper:1.0-3.4.10" resources: requests: - memory: "1Gi" - cpu: "0.5" + memory: "{{.Values.zookeeper.memory}}M" + cpu: "{{.Values.zookeeper.cpu}}" ports: - containerPort: 2181 name: client @@ -86,7 +93,7 @@ spec: - sh - -c - "start-zookeeper \ - --servers=1 \ + --servers={{.Values.zookeeper.count}} \ --data_dir=/var/lib/zookeeper/data \ --data_log_dir=/var/lib/zookeeper/data/log \ --conf_dir=/opt/zookeeper/conf \ @@ -132,4 +139,5 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 10Gi + storage: {{.Values.zookeeper.volumeSize}} +{{ end }} diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index ad01d742..925b88a3 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -1,14 +1,15 @@ # The image used to build the Dremio cluster. It is recommended to update the # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. -image: dremio/dremio-oss:latest +image: dremio/dremio-oss +imageTag: latest # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. coordinator: - memory: 16384 - cpu: 8 + memory: 122880 + cpu: 15 # This count is for slave coordinators only. # The chart will always create one master coordinator - you are # not required to have more than one master coordinator. @@ -19,10 +20,45 @@ coordinator: port: 31010 volumeSize: 100Gi executor: - memory: 16384 - cpu: 4 + memory: 122880 + cpu: 15 count: 3 volumeSize: 100Gi + cloudCache: + # Requires Dremio version 4.0.0 or later + enabled: true + quota: + # Percentage of the diskspace for the running Kubernetes node + # that can be used for Cloud Cache files. + fs_pct: 70 + # Percentage of that space that can be used for the internal + # Cloud Cache database. + db_pct: 70 + # Percentage of that space that can be used for cacheing + # materialised reflections. This is an upper-bound, not a + # reservation. + cache_pct: 100 +zookeeper: + memory: 1024 + cpu: 0.5 + count: 3 + volumeSize: 10Gi + +# To create a TLS secret, use the following command: +# kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} +tls: + ui: + # To enable TLS for the web UI, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. + enabled: false + secret: dremio-tls-secret-ui + client: + # To enable TLS for the client endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. Client + # endpoint encryption is available only on Dremio Enterprise + # Edition and should not be enabled otherwise. + enabled: false + secret: dremio-tls-secret-client # If your Kubernetes cluster does not support LoadBalancer, # comment out the line below for the helm chart to succeed or add @@ -44,6 +80,38 @@ serviceType: LoadBalancer #storageClass: managed-premium # For private and protected docker image repository, you should store -# the credentials in a kubernetes secret and provide the secret name here. -# For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod -#imagePullSecrets=secretname +# the credentials in a kubernetes secret and provide the secret name +# here. For more information, see +# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +#imagePullSecrets: secretname + +# Target pods to nodes based on labels set on the nodes. For more +# information, see +# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +#nodeSelector: +# key: value + +# Control where uploaded files are stored. See +# https://docs.dremio.com/deployment/distributed-storage.html for more +# information +distStorage: + # Valid values are local, aws, azure or azureStorage. aws and azure + # choice requires additional configuration data. + type: "local" + aws: # S3 + bucketName: "Your_AWS_bucket_name" + path: "/" + accessKey: "Your_AWS_Access_Key" + secret: "Your_AWS_Secret" + azure: # ADLS gen1 + datalakeStoreName: "Your_Azure_DataLake_Storage_name" + path: "/" + applicationId: "Your_Azure_Application_Id" + secret: "Your_Azure_Secret" + oauth2EndPoint: "Azure_OAuth2_Endpoint" + azureStorage: # AzureStorage gen2v2 + accountName: "Azure_storage_v2_account_name" + accessKey: "Access_key_for_the_storage_account" + + filesystem: "Filesystem_in_storage_account" + path: "/" diff --git a/charts/dremio_v2/Chart.yaml b/charts/dremio_v2/Chart.yaml new file mode 100644 index 00000000..6362cdb8 --- /dev/null +++ b/charts/dremio_v2/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: "v1" +name: "dremio" +version: "2.0.0" +keywords: + - dremio + - data +home: https://www.dremio.com/ diff --git a/charts/dremio_v2/README.md b/charts/dremio_v2/README.md new file mode 100644 index 00000000..982ca260 --- /dev/null +++ b/charts/dremio_v2/README.md @@ -0,0 +1,106 @@ +# Installing Dremio on Kubernetes + +You can follow these instructions to install Dremio in a Kubernetes cluster provisioned through a cloud provider or running in an on-premises environment. Supported cloud providers are Amazon Elastic Kubernetes Service (EKS), Google Kubernetes Engine (GKE), and Microsoft Azure Kubernetes Service (AKS). + +If you are upgrading from the previous Helm chart for Dremio, please see the [Migrating Helm Chart Versions](./docs/setup/Migrating-Helm-Chart-Versions.md) documentation. + +## Prerequisites + +* Ensure that you have an existing Kubernetes cluster. +* Ensure that Helm 3 is set up on a local machine. +* Ensure that a local kubectl is configured to access your Kubernetes cluster. + +## Procedure + +1. Download [the `dremio-cloud-tools` repository](https://github.com/dremio/dremio-cloud-tools/tree/master/charts/dremio_v2). +1. In a terminal window, change to the `dremio-cloud-tools/charts/dremio_v2/` directory. +1. Review the default values in the file `values.yaml`, which configures the Dremio installation. If you want to override any of these values, create a file with the `.yaml` extension in this directory, copy into this file the keys for which you want to set non-default values, and then set the values in the file. Making changes in this file allows you to quickly update to the latest version of the chart by copying the file across Helm chart updates. Refer to "[`values.yaml` Reference](./docs/Values-Reference.md)" for details about the settings. +1. Review the document "[Important Setup Considerations](./docs/setup/Important-Setup-Considerations.md)" and make any of the listed changes to the values in your `values.local.yaml` file that you think are necessary for your environment. +1. Install the Helm Chart by running one of these commands from the `charts` directory: + * If you are overriding any of the default values that are in the `values.yaml` file, run this command: + + ```bash + $ helm install dremio_v2 -f + ``` + where `` is the name of the file that you are using to override values. + * If you are not overriding any of the values in the `values.yaml` file, run this command: + ```bash + $ helm install dremio_v2 + ``` + + If the installation takes longer than a few minutes to complete, you can check the status of the installation by using the following command: + + ```bash + $ kubectl get pods + ``` + + If a pod remains in **Pending** state for more than a few minutes, run the following command to view its status to check for issues, such as insufficient resources for scheduling: + + ```bash + $ kubectl describe pods + ``` + + If the events at the bottom of the output mention insufficient CPU or memory, either adjust the values in your `values.local.yaml` and restart the process or add more resources to your Kubernetes cluster. + + When all of the pods are in the **Ready** state, the installation is complete. + +## What to do next + +Now that you've installed the Dremio Helm chart, you can get the HTTP addresses for connecting to Dremio's UI, connecting to Dremio from BI tools via JDBC/ODBC, and for connecting to Dremio from BI tools via Apache Arrow Flight. + +### Getting the HTTP address for connecting to the Dremio UI + +Run the following command to use the `service dremio-client` in Kubernetes to find the host for the Dremio UI: + +``` +$ kubectl get services dremio-client +``` + +* If the value in the `TYPE` column of the output is `LoadBalancer`, access the Dremio UI through the address in the `EXTERNAL_IP` column and port 9047. +For example, in the output below, the value under the `EXTERNAL-IP` column is 8.8.8.8. Therefore, you can get to the Dremio UI via port 9047 on that address: http://8.8.8.8:9047 + ``` + $ kubectl get services dremio-client + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + dremio-client LoadBalancer 10.99.227.180 8.8.8.8 31010:32260/TCP,9047:30620/TCP 2d + ``` + + If you want to change the expose port on the load balancer, change the value of the setting `coordinator.web.port` in the file `values.local.yaml`. +* If the value in the TYPE column of the output is `NodePort`, access the Dremio UI through http://localhost:30670. + +### Getting the HTTP address for using ODBC or JDBC to connect from BI tools to Dremio + +Run the following command to use the `service dremio-client` in Kubernetes to find the host for JDBC/ODBC connections by using the following command: +``` +$ kubectl get services dremio-client +``` +* If the value in the TYPE column of the output is `LoadBalancer`, access Dremio using JDBC/ODBC through the address in the `EXTERNAL_IP` column and port 31010. + For example, in the output below, the value under the `EXTERNAL-IP` column is 8.8.8.8. Therefore, you can get to the Dremio UI via port 9047 on that address: http://8.8.8.8:9047 + ``` + $ kubectl get services dremio-client + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + dremio-client LoadBalancer 10.99.227.180 8.8.8.8 31010:32260/TCP,9047:30620/TCP 2d + ``` + If you want to change the expose port on the load balancer, change the value of the setting `coordinator.client.port` in the file `values.local.yaml`. + +* If the value in the TYPE column of the output is `NodePort`, access Dremio using JDBC/ODBC through http://localhost:32390. + +### Getting the HTTP address for using Apache Arrow Flight to connect from BI tools to Dremio + +Run the following command to use the `service dremio-client` in Kubernetes to find the host for Apache Arrow Flight connections by using the following command: + +``` +$ kubectl get services dremio-client +``` + +* If the value in the TYPE column of the output is `LoadBalancer`, access Dremio using Flight through the address in the `EXTERNAL_IP` column and port 32010. + + For example, in the output below, the value under the `EXTERNAL-IP` column is 8.8.8.8. Therefore, you can get to the Dremio UI via port 9047 on that address: http://8.8.8.8:9047 + + ``` + $ kubectl get services dremio-client + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + dremio-client LoadBalancer 10.99.227.180 8.8.8.8 31010:32260/TCP,9047:30620/TCP 2d + ``` + + If you want to change the expose port on the load balancer, change the value of the setting `coordinator.flight.port` in the file `values.local.yaml`. +* If the value in the TYPE column of the output is `NodePort`, access Dremio using Flight through http://localhost:31357. diff --git a/charts/dremio_v2/config/core-site.xml b/charts/dremio_v2/config/core-site.xml new file mode 100644 index 00000000..709d07bd --- /dev/null +++ b/charts/dremio_v2/config/core-site.xml @@ -0,0 +1,179 @@ + + + +{{- if and $.Values.distStorage.type (ne $.Values.distStorage.type "local") -}} + {{- if eq $.Values.distStorage.type "gcp" -}} + + + dremio.gcs.whitelisted.buckets + GCS bucket to use for distributed storage + {{ required "GCS Bucket name required" $.Values.distStorage.gcp.bucketName }} + + {{- if eq $.Values.distStorage.gcp.authentication "serviceAccountKeys" -}} + + fs.dremiogcs.impl + The FileSystem implementation. Must be set to com.dremio.plugins.gcs.GoogleBucketFileSystem + com.dremio.plugins.gcs.GoogleBucketFileSystem + + + dremio.gcs.use_keyfile + Use the key file + true + + + dremio.gcs.projectId + GCP Project ID + {{ required "GCP Project ID required" $.Values.distStorage.gcp.credentials.projectId }} + + + dremio.gcs.clientId + GCP Service Account Client ID + {{ required "GCP Service Account Client ID required" $.Values.distStorage.gcp.credentials.clientId }} + + + dremio.gcs.clientEmail + GCP Service Account Client Email + {{ required "GCP Service Account Client Email required" $.Values.distStorage.gcp.credentials.clientEmail }} + + + dremio.gcs.privateKeyId + GCP Service Account Private Key ID + {{ required "GCP Service Account Private Key ID required" $.Values.distStorage.gcp.credentials.privateKeyId }} + + + dremio.gcs.privateKey + GCP Service Account Private Key + {{ required "GCP Service Account Private Key required" $.Values.distStorage.gcp.credentials.privateKey }} + + {{- else if eq $.Values.distStorage.gcp.authentication "auto" -}} + + fs.dremiogcs.impl + The FileSystem implementation. Must be set to com.dremio.plugins.gcs.GoogleBucketFileSystem + com.dremio.plugins.gcs.GoogleBucketFileSystem + + + dremio.gcs.use_keyfile + Do not use the key file + false + + {{- else -}} + {{ fail "Unrecognized GCP authentication mode." }} + {{- end -}} + {{- if $.Values.distStorage.gcp.extraProperties -}} + {{- $.Values.distStorage.gcp.extraProperties | nindent 4 }} + {{- end -}} + {{- end -}} + {{- if eq $.Values.distStorage.type "aws" }} + + + fs.dremioS3.impl + The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem + com.dremio.plugins.s3.store.S3FileSystem + + {{ if eq $.Values.distStorage.aws.authentication "accessKeySecret" -}} + + fs.s3a.aws.credentials.provider + The credential provider type. + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" $.Values.distStorage.aws.credentials.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" $.Values.distStorage.aws.credentials.secret}} + + {{ else if eq $.Values.distStorage.aws.authentication "metadata" -}} + + fs.s3a.aws.credentials.provider + The credential provider type. + com.amazonaws.auth.InstanceProfileCredentialsProvider + + {{ else if eq $.Values.distStorage.aws.authentication "awsProfile" -}} + + com.dremio.awsProfile + AWS Profile to use. + {{ required "AWS profile required" $.Values.distStorage.aws.credentials.awsProfileName}} + + + fs.s3a.aws.credentials.provider + The credential provider type. + com.dremio.plugins.s3.store.AWSProfileCredentialsProviderV1 + + {{- else -}} + {{ fail "Unrecognized AWS authentication mode." }} + {{- end -}} + {{- if $.Values.distStorage.aws.extraProperties -}} + {{- $.Values.distStorage.aws.extraProperties | nindent 4 }} + {{- end -}} + {{- end -}} + {{- if eq $.Values.distStorage.type "azure" }} + + + + fs.dremioAdl.impl + com.dremio.plugins.adl.store.DremioAdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory. + {{ required "Azure application ID required" $.Values.distStorage.azure.credentials.applicationId }} + + + dfs.adls.oauth2.credential + Generated password value for the registered application. + {{ required "Azure secret value required" $.Values.distStorage.azure.credentials.secret }} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{ required "Azure OAuth2 token endpoint required" $.Values.distStorage.azure.credentials.oauth2Endpoint }} + + + dfs.adls.oauth2.access.token.provider.type + ClientCredential + + + fs.adl.impl.disable.cache + false + + {{- if $.Values.distStorage.azure.extraProperties -}} + {{- $.Values.distStorage.azure.extraProperties | nindent 4 }} + {{- end -}} + {{- end }} + {{- if eq $.Values.distStorage.type "azureStorage" }} + + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" $.Values.distStorage.azureStorage.accountName }} + + + dremio.azure.key + The shared access key for the storage account. + {{ required "Shared access key required" $.Values.distStorage.azureStorage.credentials.accessKey }} + + + dremio.azure.mode + The storage account type. + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. + True + + {{- if $.Values.distStorage.azureStorage.extraProperties -}} + {{- $.Values.distStorage.azureStorage.extraProperties | nindent 4 }} + {{- end -}} + {{- end }} +{{- end}} + \ No newline at end of file diff --git a/charts/dremio_v2/config/dremio-env b/charts/dremio_v2/config/dremio-env new file mode 100644 index 00000000..6f6f8c09 --- /dev/null +++ b/charts/dremio_v2/config/dremio-env @@ -0,0 +1,101 @@ +# +# Copyright (C) 2017-2018 Dremio Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Dremio environment variables used by Dremio daemon +# + +# +# Directory where Dremio logs are written +# Default to $DREMIO_HOME/log +# +#DREMIO_LOG_DIR=${DREMIO_HOME}/log + +# +# Send logs to console and not to log files. The DREMIO_LOG_DIR is ignored if set. +# +#DREMIO_LOG_TO_CONSOLE=1 + +# +# Directory where Dremio pidfiles are written +# Default to $DREMIO_HOME/run +# +#DREMIO_PID_DIR=${DREMIO_HOME}/run + +# +# Max total memory size (in MB) for the Dremio process +# +# If not set, default to using max heap and max direct. +# +# If both max heap and max direct are set, this is not used +# If one is set, the other is calculated as difference +# of max memory and the one that is set. +# +#DREMIO_MAX_MEMORY_SIZE_MB= + +# +# Max heap memory size (in MB) for the Dremio process +# +# Default to 4096 for server +# +#DREMIO_MAX_HEAP_MEMORY_SIZE_MB=4096 + +# +# Max direct memory size (in MB) for the Dremio process +# +# Default to 8192 for server +# +#DREMIO_MAX_DIRECT_MEMORY_SIZE_MB=8192 + +# +# Max permanent generation memory size (in MB) for the Dremio process +# (Only used for Java 7) +# +# Default to 512 for server +# +#DREMIO_MAX_PERMGEN_MEMORY_SIZE_MB=512 + +# +# Garbage collection logging is enabled by default. Set the following +# parameter to "no" to disable garbage collection logging. +# +#DREMIO_GC_LOGS_ENABLED="yes" + +# +# The scheduling priority for the server +# +# Default to 0 +# +# DREMIO_NICENESS=0 +# + +# +# Number of seconds after which the server is killed forcibly it it hasn't stopped +# +# Default to 120 +# +#DREMIO_STOP_TIMEOUT=120 + +# Extra Java options - shared between dremio and dremio-admin commands +# +#DREMIO_JAVA_EXTRA_OPTS= + +# Extra Java options - client only (dremio-admin command) +# +#DREMIO_JAVA_CLIENT_EXTRA_OPTS= + +# Warning: Do not set DREMIO_JAVA_SERVER_EXTRA_OPTS in dremio-env. +# Please see the values.yaml extraStartParams for setting additional options for Dremio process startup. diff --git a/charts/dremio_v2/config/dremio.conf b/charts/dremio_v2/config/dremio.conf new file mode 100644 index 00000000..0c68168f --- /dev/null +++ b/charts/dremio_v2/config/dremio.conf @@ -0,0 +1,96 @@ +# +# Copyright (C) 2017-2018 Dremio Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +paths: { + # Local path for dremio to store data. + local: ${DREMIO_HOME}"/data" + # Distributed path Dremio data including job results, downloads, + # uploads, etc + {{- if ne $.Values.distStorage.type "local" }} + results: "pdfs://"${paths.local}"/results" + {{- if eq $.Values.distStorage.type "aws" }} + dist: "dremioS3:///{{ required "AWS bucket name required" $.Values.distStorage.aws.bucketName }}{{ required "AWS bucket path required" $.Values.distStorage.aws.path }}" + {{- else if eq $.Values.distStorage.type "azure" }} + dist: "dremioAdl://{{ required "Azure Datalake store name required" $.Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Azure Datalake path required" $.Values.distStorage.azure.path }}" + {{- else if eq $.Values.distStorage.type "azureStorage" }} + dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" $.Values.distStorage.azureStorage.filesystem }}{{ required "Azure Storage path required" $.Values.distStorage.azureStorage.path }}" + {{- else if eq $.Values.distStorage.type "gcp" }} + dist: "dremiogcs:///{{ required "GCS bucket name required" $.Values.distStorage.gcp.bucketName }}{{ required "GCS bucket path required" $.Values.distStorage.gcp.path }}" + {{- else -}} + {{ fail "Unrecognized distStorage type." }} + {{- end }} + {{- else }} + dist: "pdfs://"${paths.local}"/pdfs" + {{- end }} +} + +services: { + # The services running are controlled via command line options passed in + # while starting the services via kubernetes. Updating the values listed below will not + # impact what is running: + # - coordinator.enabled + # - coordinator.master.enabled + # - coordinator.master.embedded-zookeeper.enabled + # - executor.enabled + # + # Other service parameters can be customized via this file. + + {{- if and $.Values.executor.cloudCache.enabled (eq $.Values.executor.cloudCache.enabled true) }} + executor: { + cache: { + path.db: "/opt/dremio/cloudcache/c0" + pctquota.db: 100 + + path.fs: ["/opt/dremio/cloudcache/c0"] + pctquota.fs: [100] + ensurefreespace.fs: [0] + {{ range $index, $_ := rest $.Values.executor.cloudCache.volumes -}} + path.fs += "/opt/dremio/cloudcache/c{{ add1 $index }}" + pctquota.fs += 100 + ensurefreespace.fs += 0 + {{- end }} + } + } + {{- end }} +} + +{{- if and $.Values.executor.cloudCache.enabled (ne $.Values.distStorage.type "local") }} +debug: { + # Enable caching for distributed storage, it is turned off by default + dist.caching.enabled: true, + # Max percent of total available cache space to use when possible for distributed storage + dist.max.cache.space.percent: 100 +} +{{- end }} + +{{- if $.Values.coordinator.web.tls.enabled }} +services.coordinator.web.ssl.enabled: true +services.coordinator.web.ssl.auto-certificate.enabled: false +services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" +{{- end }} + +{{- if $.Values.coordinator.client.tls.enabled }} +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +services.coordinator.client-endpoint.ssl.enabled: true +services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false +services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" +{{- end }} + +{{- if $.Values.coordinator.flight.tls.enabled }} +services.flight.ssl.enabled: true +services.flight.ssl.auto-certificate.enabled: false +services.flight.ssl.keyStore: "/opt/dremio/tls/flight.pkcs12" +{{- end }} diff --git a/charts/dremio_v2/config/hive2/README.md b/charts/dremio_v2/config/hive2/README.md new file mode 100644 index 00000000..982beef2 --- /dev/null +++ b/charts/dremio_v2/config/hive2/README.md @@ -0,0 +1,2 @@ +### Hive 2 Configuration Files +This directory is used to store Hive 2 configuration files to be deployed to Dremio. \ No newline at end of file diff --git a/charts/dremio_v2/config/hive3/README.md b/charts/dremio_v2/config/hive3/README.md new file mode 100644 index 00000000..94c8c9da --- /dev/null +++ b/charts/dremio_v2/config/hive3/README.md @@ -0,0 +1,2 @@ +### Hive 3 Configuration Files +This directory is used to store Hive 3 configuration files to be deployed to Dremio. \ No newline at end of file diff --git a/charts/dremio_v2/config/logback-access.xml b/charts/dremio_v2/config/logback-access.xml new file mode 100644 index 00000000..a00ae338 --- /dev/null +++ b/charts/dremio_v2/config/logback-access.xml @@ -0,0 +1,51 @@ + + + + + + + + + ${dremio.log.path}/access.log + + ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + combined + + + + + + + + + combined + + + + + + + diff --git a/charts/dremio_v2/config/logback-admin.xml b/charts/dremio_v2/config/logback-admin.xml new file mode 100644 index 00000000..b393d02b --- /dev/null +++ b/charts/dremio_v2/config/logback-admin.xml @@ -0,0 +1,67 @@ + + + + + + %msg%n%ex{0}%n + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + + %date{ISO8601} [%thread] %-5level %logger{30} - %msg%n + + + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + ${dremio.admin.log.path} + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + + + + + + diff --git a/charts/dremio_v2/config/logback.xml b/charts/dremio_v2/config/logback.xml new file mode 100644 index 00000000..0ab3528b --- /dev/null +++ b/charts/dremio_v2/config/logback.xml @@ -0,0 +1,164 @@ + + + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + ${dremio.log.path}/server.log + + ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + ${dremio.log.path}/metadata_refresh.log + + ${dremio.log.path}/archive/metadata_refresh.%d{yyyy-MM-dd}.log.gz + 30 + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + ${dremio.log.path}/json/server.json + + ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.%i.json.gz + 30 + + 100MB + + + + + + {"timestamp": "%date{ISO8601}", "host":"${HOSTNAME}" } + thread + levelName + levelValue + logger + message + + + + + + + + ${dremio.log.path}/queries.json + + ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.%i.json.gz + 30 + + 100MB + + + + + %msg%n + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${dremio.log.path}/hive.deprecated.function.warning.log + + ${dremio.log.path}/archive/hive.deprecated.function.warning.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + + + + diff --git a/charts/dremio_v2/docs/README.md b/charts/dremio_v2/docs/README.md new file mode 100644 index 00000000..37c5cacd --- /dev/null +++ b/charts/dremio_v2/docs/README.md @@ -0,0 +1,14 @@ +# Dremio on Kubernetes Documentation + +* **Setup** + * [Important Setup Considerations](./setup/Important-Setup-Considerations.md) + * [Migrating Helm Chart Versions](./setup/Migrating-Helm-Chart-Versions.md) + * [Customizing Dremio Configuration](./setup/Customizing-Dremio-Configuration.md) + * [Setup Hive 2 and 3](./setup/Setup-Hive-2-and-3.md) + * [Custom Dremio Image](./setup/Custom-Dremio-Image.md) +* **Administration** + * [Dremio Administration](./administration/Dremio-Administration.md) + * [Scaling Coordinators and Executors](./administration/Scaling-Coordinators-and-Executors.md) + * [Upgrading Dremio](./administration/Upgrading-Dremio.md) + * [Viewing Logs](./administration/Viewing-Logs.md) +* [`Values.yaml` Reference](./Values-Reference.md) \ No newline at end of file diff --git a/charts/dremio_v2/docs/Values-Reference.md b/charts/dremio_v2/docs/Values-Reference.md new file mode 100644 index 00000000..510d40fa --- /dev/null +++ b/charts/dremio_v2/docs/Values-Reference.md @@ -0,0 +1,1611 @@ +# `Values.yaml` Reference + +🔎 To search this document for specific values, use dot-notation to search, i.e. `coordinator.volumeSize`. + +ℹ️ In all code examples, `[...]` denotes additional values that have been omitted. + +## Top Level Values + +### Image Configuration + +#### `image` + +Type: String + +By default, the image is set to `dremio/dremio-oss`, the community edition of Dremio. + +The `image` refers to the location to retrieve the specific container image for Dremio. In some cases, the `image` value may vary in corporate environments where there may be a private container registry that is used. + +#### `imageTag` + +Type: String + +By default, the value is set to `latest`. + +It is **strongly** recommended to pin the version of Dremio that we are deploying by setting the `imageTag` to a precise version and not leave the value as latest. Since Dremio versions are not backwards compatible, leaving it as latest may automatically upgrade dremio during pod creation. + +#### `imagePullSecrets` + +Type: Array + +By default, this value is not set. + +In some environments, an internal mirror may be used that requires authentication. For enterprise users, you may need to specify the `imagePullSecret` for the Kubernetes cluster to have access to the Dremio enterprise image. Please refer to the documentation [Pull an Image from a Private Repository](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) provided by Kubernetes on how to create an image pull secret. + + +### Kubernetes Service Account + +#### `serviceAccount` + +Type: String + +By default, this value is not set and will use the default service account configured for the Kubernetes cluster. + +This value is independently overridable in each section ([`coordinator`](#coordinator), [`executor`](#executor), [`zookeeper`](#zookeeper)). + +More Info: See the [Service Accounts](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/) documentation for Kubernetes. + +### Storage Configuration + +#### `storageClass` + +Type: String + +By default, this value is not set and will use the default storage class configured for the Kubernetes cluster. + +Storage class has a direct impact on the performance of the Dremio cluster. Optionally set this value to use the same storage class for all persistent volumes created. This value is independently overridable in each section ([`coordinator`](#coordinator), [`executor`](#executor), [`zookeeper`](#zookeeper)). + +More Info: See the [Storage Classes](https://kubernetes.io/docs/concepts/storage/storage-classes/) documentation for Kubernetes. + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are set to empty. These values are independently overridable in each section ([`coordinator`](#coordinator), [`executor`](#executor), [`zookeeper`](#zookeeper)). + +#### `annotations` + +Type: Dictionary + +The annotations set at this root level are used by all `StatefulSet` resources unless overridden in their respective configuration sections. + +For example, you can set annotations as follows: + +```yaml +annotations: + example-annotation-one: "example-value-one" + example-annotation-two: "example-value-two" +[...] +``` + +More Info: See the [Annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) documentation for Kubernetes. + +#### `podAnnotations` + +Type: Dictionary + +The pod annotations set at this root level are used by all `Pod` resources unless overridden in their respective configuration sections. + +For example, you can set pod annotations as follows: + +```yaml +podAnnotations: + example-pod-annotation-one: "example-value-one" + example-pod-annotation-two: "example-value-two" +[...] +``` + +More Info: See the [Annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) documentation for Kubernetes. + +#### `labels` + +Type: Dictionary + +The labels set at this root level are used by all `StatefulSet` resources unless overridden in their respective configuration sections. + +For example, you can set labels as follows: + +```yaml +labels: + example-label-one: "example-value-one" + example-label-two: "example-value-two" +[...] +``` + +More Info: See the [Labels and Selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) documentation for Kubernetes. + +#### `podLabels` + +Type: Dictionary + +The pod labels set at this root level are inherited by all `Pod` resources unless overridden in their respective configuration sections. + +For example, you can set pod labels as follows: + +```yaml +podLabels: + example-pod-label-one: "example-value-one" + example-pod-label-two: "example-value-two" +[...] +``` + +More Info: See the [Labels and Selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) documentation for Kubernetes. + +#### `nodeSelector` + +Type: Dictionary + +The node selectors set at this root level are inherited by all `Pod` resources unless overridden in their respective configuration sections. + +For example, you can set the node selector to select nodes that have a label `diskType` of value `ssd` as follows: + +```yaml +nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: See the [nodeSelector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) section of Assigning Pods to Nodes documentation for Kubernetes. + +#### `tolerations` + +Type: Array + +The tolerations set at this root level are inherited by all `Pod` resources unless overridden in their respective configuration sections. + +For example, if there is a node with the taint `example-key=example-value:NoSchedule`, you can set the tolerations to allow the pod to be scheduled as follows: + +```yaml +tolerations: +- key: "example-key" + operator: "Exists" + effect: "NoSchedule" +[...] +``` + +More Info: See the [Taints and Tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) documentation for Kubernetes. + +### Dremio Configuration + +#### `coordinator` + +Type: Dictionary + +This section controls the deployment of coordinator instance(s). See the [Coordinator Values](#coordinator-values) section. + +#### `executor` + +Type: Dictionary + +This section controls the deployment of executor instance(s). See the [Executor Values](#executor-values) section. + +#### `distStorage` + +Type: Dictionary + +This section controls Dremio's distributed storage configuration. See the [Distributed Storage Values](#distributed-storage-values) section. + +#### `service` + +Type: Dictionary + +This section controls Dremio's Kubernetes service which is exposed to end users of Dremio. See the [Service Values](#storage-values) section. + +### Zookeeper Configuration + +#### `zookeeper` + +Type: Dictionary + +This section controls the deployment of Zookeeper in Kubernetes. See the [Zookeeper Values](#zookeeper-values) section. + +### Advanced Configuration + +#### `extraStartParams` + +Type: String + +By default, this value is not set. + +This value controls additional parameters passed to the Dremio process. + +For example, to pass an additional system property to the java process, you can specify the following: + +```yaml +extraStartParams: >- + -DsomeTestKey=someValue +[...] +``` + +#### `extraInitContainers` + +Type: String + +By default, this value is not set. + +This value controls additional `initContainers` that are started as part of the initialization process for Dremio's pods. The value specified here may reference values specified in the built-in `Values` object in Helm. + +For example, to have an `initContainer` with the Dremio image, you can specify the following: + +```yaml +extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] +[...] +``` + +#### `extraVolumes` + +Type: Array + +By default, this value is not set. + +This value controls additional volumes that are attached to the Dremio's pods. This specifies additional volumes that should be mountable to the containers in Dremio's pods. This value is typically used in conjunction with `extraVolumeMounts`. + +For example, if you have a `ConfigMap` named `cm-dremio-additional-files` with additional files that you want to include in the running Dremio pods, you can specify the following: + +```yaml +extraVolumes: +- name: dremio-additional-files + configMap: + name: cm-dremio-additional-files +[...] +``` + +#### `extraVolumeMounts` + +Type: Array + +By default, this value is not set. + +This value controls the additional volumes that should be mounted to the Dremio containers and the paths that each volume should be mounted at. This value is typically used in conjunction with `extraVolumes`. + +For example, if you have set the above `extraVolumes` value as shown in the example, you can map this volume into the path `/additional-files` as follows: + +```yaml +extraVolumeMounts: +- name: dremio-additional-files + mountPath: "/additional-files" +[...] +``` + +## Coordinator Values + +### General Configuration + +#### `coordinator.cpu` & `coordinator.memory` + +Type: Integer + +By default, the value of `cpu` is `15` and the value of memory is `122800` (MB). + +The values for `cpu` and `memory` control the amount of CPU and memory in MB being requested for each coordinator instance for the purposes of scheduling a coordinator to a specific node in the Kubernetes cluster. + +***Note***: While the values specified are not upper bounds, the value of `memory` specified here is used by the chart to calculate the allocation of heap and direct memory used by Dremio. + +#### `coordinator.count` + +Type: Integer + +By default, the value is set to `0`. + +Increasing this number controls the *secondary* coordinators that are launched as part of the deployment. Regardless of this value, at minimum one master coordinator is launched as part of the deployment. The total number of coordinator instances launched will always be `coordinator.count + 1`. + +#### `coordinator.volumeSize` + +Type: String + +By default, the value is set to `128Gi`. + +The coordinator volume is used to store the RocksDB KV store and requires a performant disk. In most hosted Kubernetes environments, disk performance is determined by the size of the volume. + +### Web UI + +#### `coordinator.web.port` + +Type: Integer + +By default, the value is set to `9047`. + +To change the port that Dremio listens on, change the port to a desired value. The valid range of ports is 1 to 65535. + +#### `coordinator.web.tls.enabled` + +Type: Boolean + +By default, the value is set to `false`. + +To enable TLS on the web UI, set this value to `true`. Also, provide a value for `coordinator.web.tls.secret` that corresponds with the TLS secret that should be used. + +#### `coordinator.web.tls.secret` + +Type: String + +By default, the value is set to `dremio-tls-secret-ui`. + +This value is ignored if `coordinator.web.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the client JDBC/ODBC connections. + +For example, to have TLS enabled for the web UI using a certificate created called `dremio-tls-secret-ui`, you can set the configuration as follows: + +```yaml +coordinator: + [...] + web: + tls: + enabled: true + secret: dremio-tls-secret-ui +[...] +``` + +To create a secret, use the following command: `kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE}` providing appropriate values for `TLS_SECRET_NAME`, `KEY_FILE`, `CERT_FILE`. + +***Note***: Dremio does not support auto-rotation of secrets. To update the secret used by Dremio, restart the coordinator pods to have the new TLS secret take effect. + +More Info: See the [Creating your own Secrets](https://kubernetes.io/docs/concepts/configuration/secret/#creating-your-own-secrets) section of the Secrets documentation for Kubernetes. + +### Client (JDBC/ODBC) + +#### `coordinator.client.tls.enabled` + +Type: Boolean + +By default, the value is set to `false`. This is an **enterprise only feature** and should not be set to true when using a community edition of Dremio. + +To enable TLS on the client ODBC/JDBC port, set this value to `true`. Also, provide a value for `coordinator.client.tls.secret` that corresponds with the TLS secret that should be used. + +#### `coordinator.client.tls.secret` + +Type: String + +By default, the value is set to `dremio-tls-secret-client`. + +This value is ignored if `coordinator.client.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the client JDBC/ODBC connections. + +For example, to have TLS enabled for the client JDBC/ODBC connections using a certificate created called `dremio-tls-secret-client`, you can set the configuration as follows: + +```yaml +coordinator: + [...] + client: + tls: + enabled: true + secret: dremio-tls-secret-client +[...] +``` + +To create a secret, use the following command: `kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE}` providing appropriate values for `TLS_SECRET_NAME`, `KEY_FILE`, `CERT_FILE`. + +***Note***: Dremio does not support auto-rotation of secrets. To update the secret used by Dremio, restart the coordinator pods to have the new TLS secret take effect. + +More Info: See the [Creating your own Secrets](https://kubernetes.io/docs/concepts/configuration/secret/#creating-your-own-secrets) section of the Secrets documentation for Kubernetes. + +### Flight + +#### `coordinator.flight.tls.enabled` + +Type: Boolean + +By default, the value is set to `false`. + +To enable TLS on the Flight port, set this value to `true`. Also, provide a value for `coordinator.flight.tls.secret` that corresponds with the TLS secret that should be used. + +#### `coordinator.flight.tls.secret` + +Type: String + +By default, the value is set to `dremio-tls-secret-flight`. + +This value is ignored if `coordinator.flight.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the Flight connections. + +For example, to have TLS enabled for the Flight connections using a certificate created called `dremio-tls-secret-flight`, you can set the configuration as follows: + +```yaml +coordinator: + [...] + flight: + tls: + enabled: true + secret: dremio-tls-secret-flight +[...] +``` + +To create a secret, use the following command: `kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE}` providing appropriate values for `TLS_SECRET_NAME`, `KEY_FILE`, `CERT_FILE`. + +***Note***: Dremio does not support auto-rotation of secrets. To update the secret used by Dremio, restart the coordinator pods to have the new TLS secret take effect. + +More Info: See the [Creating your own Secrets](https://kubernetes.io/docs/concepts/configuration/secret/#creating-your-own-secrets) section of the Secrets documentation for Kubernetes. + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `coordinator.annotations` + +Type: Dictionary + +The annotations set are used by all coordinator `StatefulSet` resources. + +For example, you can set annotations as follows: + +```yaml +coordinator: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `coordinator.podAnnotations` + +Type: Dictionary + +The pod annotations set are used by all `Pod`(s) created by the coordinator `StatefulSet`(s). + +For example, you can set pod annotations as follows: + +```yaml +coordinator: + [...] + podAnnotations: + example-pod-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podAnnotations`](#podannotations) section of this reference. + +#### `coordinator.labels` + +Type: Dictionary + +The labels set are used by all coordinator `StatefulSet` resources. + +For example, you can set labels as follows: + +```yaml +coordinator: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +#### `coordinator.podLabels` + +Type: Dictionary + +The pod labels set are used by all `Pod`(s) created by the coordinator `StatefulSet`(s). + +For example, you can set pod labels as follows: + +```yaml +coordinator: + [...] + podLabels: + example-pod-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podLabels`](#podlabels) section of this reference. + +#### `coordinator.nodeSelector` + +Type: Array + +The node selectors set are used by all `Pod`(s) created by the coordinator `StatefulSet`(s). + +For example, you can set node selectors as follows: + +```yaml +coordinator: + [...] + nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: Refer to the [`nodeSelector`](#nodeselector) section of this reference. + +### Advanced Customizations + +#### `coordinator.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `storageClass`. + +Storage class has a direct impact on the performance of the Dremio cluster. On the master coordinator node, RocksDB is stored on the persistent volume created with this storage class. + +More Info: Refer to the [`storageClass`](#storageclass) section of this reference. + +#### `coordinator.serviceAccount` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `serviceAccount`. + +More Info: Refer to the [`serviceAccount`](#serviceaccount) section of this reference. + +#### `coordinator.extraStartParams` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraStartParams`. + +This value controls additional parameters passed to the Dremio process. + +For example, to pass an additional system property to the java process, you can specify the following: + +```yaml +coordinator: + [...] + extraStartParams: >- + -DsomeTestKey=someValue +[...] +``` + +More Info: Refer to the [`extraStartParams`](#extrastartparams) section of this reference. + +#### `coordinator.extraInitContainers` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraInitContainers`. + +This value controls additional `initContainers` that are started as part of the initialization process for Dremio's coordinator pods. The value specified here may reference values specified in the `values.yaml` file. + +For example, to have an `initContainer` with the Dremio image, you can specify the following: + +```yaml +coordinator: + [...] + extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] +[...] +``` + +More Info: Refer to the [`extraInitContainers`](#extrainitcontainers) section of this reference. + +#### `coordinator.extraVolumes` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumes`. + +This value controls additional volumes that are attached to the Dremio coordinator pod. This specifies additional volumes that should be mountable to the containers in the Dremio coordinator pod. This value is typically used in conjunction with `coordinator.extraVolumeMounts`. + +For example, if you have a `ConfigMap` named `cm-dremio-additional-files` with additional files that you want to include in the running Dremio coordinator pods, you can specify the following: + +```yaml +coordinator: + [...] + extraVolumes: + - name: dremio-additional-files + configMap: + name: cm-dremio-additional-files +[...] +``` + +More Info: Refer to the [`extraVolumes`](#extravolumes) section of this reference. + +#### `coordinator.extraVolumeMounts` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumeMounts`. + +This value controls the additional volumes that should be mounted to the Dremio coordinator container and the paths that the volume should be mounted at. This value is typically used in conjunction with `coordinator.extraVolumes`. + +For example, if you have set the above `coordinator.extraVolumes` value as shown in the example, you can map this volume into the path `/additional-files` as follows: + +```yaml +coordinator: + [...] + extraVolumeMounts: + - name: dremio-additional-files + mountPath: "/additional-files" +[...] +``` + +More Info: Refer to the [`extraVolumeMounts`](#extravolumemounts) section of this reference. + +## Executor Values + +### General Configuration + +#### `executor.cpu` & `executor.memory` + +Type: Integer + +By default, the value of `cpu` is `15` and the value of memory is `122800` (MB). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +The values for `cpu` and `memory` control the amount of CPU and memory in MB being requested for each executor instance for the purposes of scheduling an executor to a specific node in the Kubernetes cluster. + +***Note***: While the values specified are not upper bounds, the value of `memory` specified here is used by the chart to calculate the allocation of heap and direct memory used by Dremio. + +#### `executor.engines` + +Type: Array + +By default, the value is `["default"]`. + +By adding additional values to this list, additional sets of executors are launched. By default, each set of executors will start with `executor.count` number of pods. See the Per-Engine Overrides section of this reference to customize the number of executors are started. + +#### `executor.count` + +Type: Integer + +By default, the value is set to `3`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +Increasing this number controls the number of executors that are launched as part of the engine. Without per-engine overrides, the total number of executor pods started is calulated as the `length(executor.engines) * executor.count`. + +#### `executor.volumeSize` + +Type: String + +By default, the value is set to `128Gi`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +The executor volume is used to store results of queries run. If the `distStore.type` is set to `local`, additional resources such as accelerations may be stored in the volume. In most hosted Kubernetes environments, disk performance is determined by the size of the volume. + +### Columnar Cloud Cache (C3) Configuration + +#### `executor.cloudCache.enabled` + +Type: Boolean + +By default, the value is set to `true`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +Columnar cloud cache (C3) is enabled by default on executors. To turn off cloud cache, set this value to `false`. + +#### `executor.cloudCache.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from `executor.storageClass` or its parent value `storageClass`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +C3 is designed for usage with local NVMe storage devices. If available, it is recommended to setup a [local storage provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner/blob/master/docs/getting-started.md) to allow Dremio to utilize local NVMe storage on the Kubernetes nodes. + +#### `executor.cloudCache.volumes` + +Type: Array + +By default, the value is set to `[{size: 100Gi}]`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +By specifying more than one item in the list, additional volumes are provisioned for C3. Each volume must specify a `size` and optionally a `name` and custom `storageClass`. If the volume omits the `storageClass`, the value of `executor.cloudCache.storageClass` or its parent values are used. + +For example, if the Kubernetes nodes that are provisioned have three local NVMe storage devices available, then we can create three C3 cache volumes each using a different `size` and combination of custom `name` and `storageClass` values: + +```yaml +executor: + [...] + cloudCache: + volumes: + - size: 300Gi + - name: "executor-c3-0" + size: 100Gi + storageClass: "local-nvme" + - size: 50Gi + storageClass: "local-nvme" +[...] +``` + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `executor.annotations` + +Type: Dictionary + +The annotations set are used by all executor `StatefulSet` resources. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set annotations as follows: + +```yaml +executor: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `executor.podAnnotations` + +Type: Dictionary + +The pod annotations set are used by all `Pod`(s) created by the executor `StatefulSet`(s). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set pod annotations as follows: + +```yaml +executor: + [...] + podAnnotations: + example-pod-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podAnnotations`](#podannotations) section of this reference. + +#### `executor.labels` + +Type: Dictionary + +The labels set are used by all executor `StatefulSet` resources. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set labels as follows: + +```yaml +executor: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +#### `executor.podLabels` + +Type: Dictionary + +The pod labels set are used by all `Pod`(s) created by the executor `StatefulSet`(s). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set pod labels as follows: + +```yaml +executor: + [...] + podLabels: + example-pod-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podLabels`](#podlabels) section of this reference. + +#### `executor.nodeSelector` + +Type: Array + +The node selectors set are used by all `Pod`(s) created by the executor `StatefulSet`(s). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set node selectors as follows: + +```yaml +executor: + [...] + nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: Refer to the [`nodeSelector`](#nodeselector) section of this reference. + +### Advanced Customizations + +#### `executor.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `storageClass`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +More Info: Refer to the [`storageClass`](#storageclass) section of this reference. + +#### `executor.serviceAccount` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `serviceAccount`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +More Info: Refer to the [`serviceAccount`](#serviceaccount) section of this reference. + +#### `executor.extraStartParams` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraStartParams`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls additional parameters passed to the Dremio process. + +For example, to pass an additional system property to the java process, you can specify the following: + +```yaml +coordinator: + [...] + extraStartParams: >- + -DsomeTestKey=someValue +[...] +``` + +More Info: Refer to the [`extraStartParams`](#extrastartparams) section of this reference. + +#### `executor.extraInitContainers` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraInitContainers`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls additional `initContainers` that are started as part of the initialization process for Dremio's executor pods. The value specified here may reference values specified in the `values.yaml` file. + +For example, to have an `initContainer` with the Dremio image, you can specify the following: + +```yaml +coordinator: + [...] + extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] +[...] +``` + +More Info: Refer to the [`extraInitContainers`](#extrainitcontainers) section of this reference. + +#### `executor.extraVolumes` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumes`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls additional volumes that are attached to the Dremio executor pod. This specifies additional volumes that should be mountable to the containers in the Dremio executor pod. This value is typically used in conjunction with `executor.extraVolumeMounts`. + +For example, if you have a `ConfigMap` named `cm-dremio-additional-files` with additional files that you want to include in the running Dremio executor pods, you can specify the following: + +```yaml +executor: + [...] + extraVolumes: + - name: dremio-additional-files + configMap: + name: cm-dremio-additional-files +[...] +``` + +More Info: Refer to the [`extraVolumes`](#extravolumes) section of this reference. + +#### `executor.extraVolumeMounts` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumeMounts`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls the additional volumes that should be mounted to the Dremio executor container and the paths that the volume should be mounted at. This value is typically used in conjunction with `executor.extraVolumes`. + +For example, if you have set the above `executor.extraVolumes` value as shown in the example, you can map this volume into the path `/additional-files` as follows: + +```yaml +executor: + [...] + extraVolumeMounts: + - name: dremio-additional-files + mountPath: "/additional-files" +[...] +``` + +More Info: Refer to the [`extraVolumeMounts`](#extravolumemounts) section of this reference. + +### Per-Engine Configuration + +#### `executor.engineOverride.` + +Type: Dictionary + +By default, this value is not set. + +Engine overrides use the name of the engine provided in the `executor.engines` array to allow customization on a per-engine basis. The value of `` should be the name of an engine provided in `executor.engines`. + +For example, the following shows all the supported override values being set (which override the shared values from `executor`): + +```yaml +executor: + [...] + engineOverride: + : + cpu: 4 + memory: 144800 + + count: 2 + + annotations: + example-annotation-one: "example-value-one" + example-annotation-two: "example-value-two" + podAnnotations: + example-pod-annotation-one: "example-value-one" + example-pod-annotation-two: "example-value-two" + labels: + example-label-one: "example-value-one" + example-label-two: "example-value-two" + podLabels: + example-pod-label-one: "example-value-one" + example-pod-label-two: "example-value-two" + nodeSelector: + diskType: "ssd" + tolerations: + - key: "example-key" + operator: "Exists" + effect: "NoSchedule" + + serviceAccount: "internal" + + extraStartParams: >- + -DsomeTestKey=someValue + + extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] + + extraVolumes: + - name: dremio-additional-files + configMap: + name: cm-dremio-additional-files + + extraVolumeMounts: + - name: dremio-additional-files + mountPath: "/additional-files" + + volumeSize: 50Gi + storageClass: "managed-premium" + + cloudCache: + enabled: true + + storageClass: "local-nvme" + + volume: + - size: 300Gi + - name: "executor-c3-0" + size: 100Gi + storageClass: "local-nvme" + - size: 50Gi + storageClass: "local-nvme" +[...] +``` + +#### `executor.engineOverride..volumeClaimName` + +Type: String + +By default, this value is not set. + +When set, this will be the volume claim name used for the peristent volume by an engine. Unless moving from an old Helm chart with existing volume claims that must be retained, this value should not be used. This value should only be used for the `default` engine as persistent volume claims are pod name dependent as well and non-`default` engines will not match the pod name required. + +For example, if moving from an old Helm chart that used `dremio-executor-volume`, you can continue to use the volumes for the `default` engine by specifying the following: + +```yaml +executor: + [...] + engineOverride: + default: + volumeClaimName: dremio-executor-volume +[...] +``` + +## Distributed Storage Values + +### General Configuration + +#### `distStorage.type` + +Type: String + +By default, this value is set to `local`. + +The valid values for `distStorage.type` are `local` (not recommended), `aws`, `azure`, `azureStorage` or `gcp`. For specific configuration values for each, see the associated sections: + +* `aws` (S3): [AWS S3](#aws-s3) +* `azure` (Azure ADLS Gen 1): [Azure ADLS Gen 1](#azure-adls-gen-1) +* `azureStorage` (Azure Storage Gen2): [Azure Storage Gen2](#azure-storage-gen2) +* `gcp` (Google Cloud Storage): [Google Cloud Storage](#google-cloud-storage) + +For example, to use AWS S3 as the distributed storage location, you can specify the following: + +```yaml +distStorage: + [...] + type: "aws" +[...] +``` + +### AWS S3 + +#### `distStorage.aws.bucketName` + +Type: String + +By default, this value is set to `AWS Bucket Name` and must be changed to a valid bucket name. + +Specify a valid bucket name that Dremio has write access to. For the required permissions, please see the [Amazon S3](http://docs.dremio.com/deployment/dist-store-config.html#amazon-s3) section of the Configuration Distributed Storage documentation for Dremio. + +#### `distStorage.aws.path` + +Type: String + +By default, this value is set to `/`. + +Dremio will write to the root path of the provided bucket. Set this value to an alternative path if you would like Dremio to write its contents to a subdirectory. + +#### `distStorage.aws.authentication` + +Type: String + +The valid values for `distStorage.aws.authentication` are: +* `metadata` (default) - Dremio will attempt to use the instance profile of the EKS node to authenticate to the S3 bucket. +* `accessKeySecret` - The values `distStorage.aws.credentials.accessKey` and `distStorage.aws.credentials.secret` are used to authenticate. +* `awsProfile` - The `distStorage.aws.credentials.awsProfileName` value is used to authenticate. + +***Note***: Dremio does not support service account IAM roles on EKS. + +#### Credentials for AWS S3 + +When providing credentials, both `distStorage.aws.credentials.accessKey` and `distStorage.aws.credentials.secret` should be provided. + +For example, the following `distStorage` configuration may be used: + +```yaml +distStorage: + [...] + aws: + bucketName: "demo.dremio.com" + path: "/" + authentication: "accessKeySecret" + credentials: + accessKey: "SOME_VALID_KEY" + secret: "SOME_VALID_SECRET" +[...] +``` + +##### `distStorage.aws.credentials.accessKey` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate via access key and secret, provide a valid access key value. + +##### `distStorage.aws.credentials.secret` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate via access key and secret, provide a valid secret value. + +#### AWS Profile Authentication for AWS S3 + +When using `awsProfile` for `distStorage.aws.authentication`, a folder containing a valid AWS `credentials` file needs to be mounted to `/opt/dremio/aws`. + +If using a profile that uses the `credentials_process` option, the target process needs to be mounted to the location specified in the `credentials` file. The script must also be marked as executable. If using a ConfigMap along with the `defaultMode` option, use a decimal value `511` which corresponds to octal `0777`. + +##### `distStorage.aws.credentials.awsProfileName` + +Type: String + +By default, this value is not set. + +Specifies the AWS profile name to use for AWS profile authentication. + +#### Advanced Configuration for AWS S3 + +##### `distStorage.aws.extraProperties` + +Type: String + +By default, this value is not set. + +This value can be used to specify additional properties to `core-site.xml` which is used to configure properties for the distributed storage source. + +For example, to set the S3 endpoint, you can do the following: + +```yaml +distStorage: + aws: + [...] + extraProperties: | + + fs.s3a.endpoint + s3.us-west-2.amazonaws.com + +[...] +``` + +### Azure ADLS Gen 1 + +#### `distStorage.azure.datalakeStoreName` + +Type: String + +By default, this value is set to `Azure Datalake Store Name` and must be changed to a valid ADLS datalake store name. + +Specify a valid datalake store name that Dremio has write access to. For the required permissions, please see the [Azure Configuration](http://docs.dremio.com/data-sources/azure-data-lake-store.html#azure-configuration) section of the Azure Data Lake Storage Gen1 documentation for Dremio. + +#### `distStorage.azure.path` + +Type: String + +By default, this value is set to `/`. + +Dremio will write to the root path of the provided datalake store. Set this value to an alternative path if you would like Dremio to write its contents to a subdirectory. + +#### Credentials for Azure ADLS Gen 1 + +##### `distStorage.azure.credentials.applicationId` + +Type: String + +By default, this value is set to `Azure Application ID` and must be changed to a valid Azure Application ID. + +For Dremio to authenticate to the datalake store, provide a valid application ID. + +##### `distStorage.azure.credentials.secret` + +Type: String + +By default, this value is set to `Azure Application Secret` and must be changed to a valid Azure Application Secret. + +For Dremio to authenticate to the datalake store, provide a valid secret value. + +##### `distStorage.azure.credentials.oauth2Endpoint` + +Type: String + +By default, this value is set to `Azure OAuth2 Endpoint` and must be changed to a valid Azure OAuth2 endpoint. + +For Dremio to authenticate to the datalake store, provide a valid OAuth2 endpoint. + +#### Advanced Configuration for Azure ADLS Gen 1 + +##### `distStorage.azure.extraProperties` + +Type: String + +By default, this value is not set. + +This value can be used to specify additional properties to `core-site.xml` which is used to configure properties for the distributed storage source. + +For example, to disable the cache (this value should not be set in production), you can do the following: + +```yaml +distStorage: + aws: + [...] + extraProperties: | + + fs.adl.impl.disable.cache + true + +``` + +### Azure Storage Gen2 + +#### `distStorage.azureStorage.accountName` + +Type: String + +By default, this value is set to `Azure Storage Account Name` and must be changed to a valid Azure Storage account name. + +Specify a valid datalake store name that Dremio has write access to. For the required permissions, please see the [Granting Azure Data Lake Store access](https://docs.dremio.com/data-sources/azure-data-lake-store.html#granting-azure-data-lake-store-access) section of the Azure Data Lake Storage Gen1 documentation for Dremio. + +#### `distStorage.azureStorage.filesystem` + +Type: String + +By default, this value is set to `Azure Storage Account Blob Container` and must be changed to a valid Azure Storage blob container. + +Specify a valid Azure Storage blob container that Dremio has write access to. + +#### `distStorage.azureStorage.path` + +Type: String + +By default, this value is set to `/`. + +Dremio will write to the root path of the provided Azure Storage blob container. Set this value to an alternative path if you would like Dremio to write its contents to a subdirectory. + +#### Credentials for Azure Storage Gen2 + +##### `distStorage.azureStorage.credentials.accessKey` + +Type: String + +By default, this value is set to `Azure Storage Account Access Key` and must be changed to a valid access key. + +For Dremio to authenticate to the provided Azure Storage blob container, provide a valid access key. + +#### Advanced Configuration for Azure Storage Gen2 + +##### `distStorage.azureStorage.extraProperties` + +Type: String + +By default, this value is not set. + +This value can be used to specify additional properties to `core-site.xml` which is used to configure properties for the distributed storage source. + +For example, to disable SSL connections (this value should not be set in production), you can do the following: + +```yaml +distStorage: + aws: + [...] + extraProperties: | + + dremio.azure.secure + false + +[...] +``` +### Google Cloud Storage + +#### `distStorage.gcp.bucketName` + +Type: String + +By default, this value is set to `GCS Bucket Name` and must be changed to a valid bucket name. + +Specify a valid bucket name that Dremio has write access to. + +#### `distStorage.gcp.path` + +Type: String + +By default, this value is set to `/`. + +Dremio will write to the root path of the provided bucket. Set this value to an alternative path if you would like Dremio to write its contents to a subdirectory. + +#### `distStorage.gcp.authentication` + +Type: String + +By default, this value is set to `auto`. + +The valid values for `distStorage.gcp.authentication` are `auto` or `serviceAccountKeys`. When set to `auto`, Dremio will use Google Application Default Credentials to authenticate to the GCS bucket. + +***Note***: On GKE clusters with [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) enabled, we recommend creating a Kubernetes [Service Account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/) and setting [`serviceAccount`](#serviceaccount) to reference the Kubernetes service account. If specifying different service accounts for coordinators/executors, ensure that all service accounts have access to the GCS bucket. + +#### Credentials for GCP GCS + +When set to `serviceAccountKeys`, the values `distStorage.gcp.credentials.projectId`, `distStorage.gcp.credentials.clientId`, `distStorage.gcp.credentials.clientEmail`, `distStorage.gcp.credentials.privateKeyId` and `distStorage.gcp.credentials.privateKey` are used to authenticate to the GCS bucket. + +For example, to use service account credential keys, you can do the following: + +```yaml +distStorage: + [...] + gcp: + bucketName: "demo-bucket.dremio.com" + path: "/" + authentication: "serviceAccountKeys" + credentials: + projectId: "dremio-demo-project" + clientId: "000000000" + clientEmail: "demo-service-account@dremio-demo-project.iam.gserviceaccount.com" + privateKeyId: "0000000000000000000000000000000000000000" + privateKey: |- + -----BEGIN PRIVATE KEY-----\nPRIVATE KEY\n-----END PRIVATE KEY-----\n +[...] +``` + +Alternatively, for example, to use a Kubernetes Service Account on GKE with Workload Identity: + +```yaml +serviceAccount: "k8s-service-account-name" +distStorage: + [...] + gcp: + bucketName: "demo-bucket.dremio.com" + path: "/" + authentication: "auto" +[...] +``` + +##### `distStorage.gcp.credentials.projectId` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate with service account credential keys, provide the project ID for the service account. + +##### `distStorage.gcp.credentials.clientId` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate with service account credential keys, provide the client ID for the service account. + +##### `distStorage.gcp.credentials.clientEmail` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate with service account credential keys, provide the client email for the service account. + +##### `distStorage.gcp.credentials.privateKeyId` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate with service account credential keys, provide the private key ID for the service account. + +##### `distStorage.gcp.credentials.privateKey` + +Type: String + +By default, this value has a partial snippet of a private key. + +For Dremio to authenticate with service account credential keys, provide the private key for the service account. Ensure this value is provided in one line. You can directly copy the value as-is from the credentials JSON file, including any special characters, but without surrounding quotes. + +#### Advanced Configuration for GCP GCS + +##### `distStorage.gcp.extraProperties` + +Type: String + +By default, this value is not set. + +This value can be used to specify additional properties to `core-site.xml` which is used to configure properties for the distributed storage source. + +## Storage Values + +### General Configuration + +#### `service.type` + +Type: String + +By default, this value is set to `LoadBalancer`. + +In some environments, a `LoadBalancer` may not be available. You may alternatively set the type to `ClusterIP` for cluster-only usage of Dremio or `NodePort` to make the service available via the port on the Kubernetes node. + +For example, to make Dremio only accessible in the Kubernetes cluster, you can do the following: + +```yaml +service: + [...] + type: ClusterIP +[...] +``` + +#### `service.sessionAffinity` + +Type: Boolean + +By default, this value is not set, which defaults to `false`. + +To enable session affinity, set this value to `ClientIP`. Session affinity is critical for the web UI when there `coordinator.count` is greater than 0. + +If utilizing Flight, please see [Important Setup Considerations](https://github.com/dremio/dremio-cloud-tools/blob/master/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md) for more information about enabling session affinity. + +### Annotations and Labels + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `service.annotations` + +Type: Dictionary + +The annotations set are used by the `Service` resource. + +For example, you can set annotations as follows: + +```yaml +service: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `service.labels` + +Type: Dictionary + +The labels set are used by the `Service` resource. + +For example, you can set labels as follows: + +```yaml +coordinator: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +### Load Balancer + +#### `service.internalLoadBalancer` + +Type: Boolean + +By default, this value is not set, which defaults to `false`. + +When enabling this property, additional annotations are added to the pod for using an internal IP for the load balancer. Specifically, the following annotations are added which provide support for AWS, AKS, and GKE load balancers: + +- `service.beta.kubernetes.io/azure-load-balancer-internal: "true"` +- `cloud.google.com/load-balancer-type: "Internal"` +- `service.beta.kubernetes.io/aws-load-balancer-internal: 0.0.0.0/0` + +If these values are not applicable for your Kubernetes cluster, use the [`service.annotations`](#serviceannotations) value to provide a custom annotation that applies to your load balancer. + +#### `service.loadBalancerIP` + +Type: String + +By default, this value is not set. + +When setting this property, the load balancer attempts to use the provided IP address instead of dynamically allocating one. This IP address should be a static IP address that is usable by Kubernetes. + +## Zookeeper Values + +### Image Configuration + +#### `zookeeper.image` + +Type: String + +By default, the value is set to `k8s.gcr.io/kubernetes-zookeeper`. + +The `image` refers to the location to retrieve the specific container image for Zookeeper. In some cases, the `zookeeper.image` value may vary in corporate environments where there may be a private container registry that is used. + +#### `zookeeper.imageTag` + +Type: String + +By default, the value is set to `1.0-3.4.10`. + +The version of Zookeeper set has been validated by Dremio to work with the Dremio software. Changing this version is not recommended unless the tag is different due to a private container registry name difference. + +### General Configuration + +#### `zookeeeper.cpu` & `zookeeper.memory` + +Type: Integer + +By default, the value of `cpu` is `0.5` and the value of memory is `1024` (MB). + +The values for `cpu` and `memory` control the amount of CPU and memory in MB being requested for each Zookeeper instance for the purposes of scheduling a Zookeeper to a specific node in the Kubernetes cluster. + +#### `zookeeper.count` + +Type: Integer + +By default, the value is set to `3`. + +This number sets the number of instances of Zookeeper to deploy. It is recommended to have a minimum of 3 to maintain a quorum. Changing the value below 3 may cause instability in the cluster. + +#### `zookeeper.volumeSize` + +Type: String + +By default, the value is set to `10Gi`. + +The Zookeeper volume is used for the WAL (Write Ahead Log) used by Zookeeper in the event of a crash. + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `zookeeper.annotations` + +Type: Dictionary + +The annotations set are used by the Zookeeper `StatefulSet` resource. + +For example, you can set annotations as follows: + +```yaml +zookeeper: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `zookeeper.podAnnotations` + +Type: Dictionary + +The pod annotations set are used by all `Pod`(s) created by the Zookeeper `StatefulSet`. + +For example, you can set pod annotations as follows: + +```yaml +zookeeper: + [...] + podAnnotations: + example-pod-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podAnnotations`](#podannotations) section of this reference. + +#### `zookeeper.labels` + +Type: Dictionary + +The labels set are used by the Zookeeper `StatefulSet` . + +For example, you can set labels as follows: + +```yaml +zookeeper: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +#### `zookeeper.podLabels` + +Type: Dictionary + +The pod labels set are used by all `Pod`(s) created by the Zookeeper `StatefulSet`. + +For example, you can set pod labels as follows: + +```yaml +zookeeper: + [...] + podLabels: + example-pod-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podLabels`](#podlabels) section of this reference. + +#### `zookeeper.nodeSelector` + +Type: Array + +The node selectors set are used by all `Pod`(s) created by the Zookeeper `StatefulSet`. + +For example, you can set node selectors as follows: + +```yaml +zookeeper: + [...] + nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: Refer to the [`nodeSelector`](#nodeselector) section of this reference. + +### Advanced Customizations + +#### `zookeeper.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `storageClass`. + +Storage class has an impact on the performance of the Zookeeper instances when writing the WAL and reading back data in the event of a crash. A more performant storage class may impact recovery times in the event of such a crash. + +More Info: Refer to the [`storageClass`](#storageclass) section of this reference. + +#### `zookeeper.serviceAccount` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `serviceAccount`. + +More Info: Refer to the [`serviceAccount`](#serviceaccount) section of this reference. diff --git a/charts/dremio_v2/docs/administration/Dremio-Administration.md b/charts/dremio_v2/docs/administration/Dremio-Administration.md new file mode 100644 index 00000000..5499cc4c --- /dev/null +++ b/charts/dremio_v2/docs/administration/Dremio-Administration.md @@ -0,0 +1,47 @@ +# Dremio Administration + +Administration commands restore, cleanup and set-password in dremio-admin needs to be run when the Dremio cluster is not running. So, before running these commands, you need to shutdown the Dremio cluster. + +As part of the Helm chart, we support invoking the `dremio-admin` commands via a `dremio-admin` pod. Consult the [Admin CLI](https://docs.dremio.com/advanced-administration/dremio-admin-cli.html) documentation for Dremio for a complete list of `dremio-admin` commands that can be invoked. + +**Starting Dremio Admin Pod** + +The `dremio-admin` pod is created via the Helm chart. During this process, Dremio will become unavailable to end users as the other pods are shutdown during this process. + +To invoke the `dremio-admin` pod, use the following Helm command: + +```bash +$ helm upgrade dremio_v2 --reuse-values --set DremioAdmin=true +``` + +**Stopping Dremio Admin Pod** + +To stop the `dremio-admin` pod and restart the other Dremio pods, use the following Helm command: + +```bash +$ helm upgrade dremio_v2 --resuse-values --set DremioAdmin=false +``` + +**Connecting to the Dremio Admin Pod** + +Once you have started the `dremio-admin` pod, you can use the following command to access the pod: + +```bash +$ kubectl exec -it dremio-admin -- bash +``` + +The above command will connect you to the dremio-admin pod. Once there, you can invoke the `dremio-admin` commands normally from within the pod. + +**Copying Files** + +To copy contents from the `dremio-admin` pod, you can use the following command: + +```bash +$ kubectl cp dremio-admin: +``` + +For example, to copy the contents of the Dremio `db` directory to a `db_backup` directory on your local machine, you can do the following: + +```bash +$ kubectl cp dremio-admin:data/db db_backup +``` diff --git a/charts/dremio_v2/docs/administration/Scaling-Coordinators-and-Executors.md b/charts/dremio_v2/docs/administration/Scaling-Coordinators-and-Executors.md new file mode 100644 index 00000000..c5b47029 --- /dev/null +++ b/charts/dremio_v2/docs/administration/Scaling-Coordinators-and-Executors.md @@ -0,0 +1,47 @@ +# Scaling Coordinators and Executors + +**Temporarily Scaling Coordinators and Executors** + +*Coordinators*: To temporarily scale the coordinator nodes that you have, modify the number of replicas for the `dremio-coordinator` StatefulSet using the following command. + +```bash +$ kubectl scale statefulsets dremio-coordinator --replicas= +``` + +This number should represent the number of *secondary* coordinators that you want. Setting this number to zero will remove all secondary coordinators and leave a single master coordinator. + +*Executors*: To temporarily scale the number of executors, locate the StatefulSet for the engine you wish to scale. + +To see the StatefulSets that exist, use the following command: + +```bash +$ kubectl get statefulsets +``` + +Then, to scale a specific engine, modify the number of replicas for the associated StatefulSet using the following command: + +```bash +$ kubectl scale statefulsets --replicas= +``` + +**Permanently Scaling Coordinators and Executors** + +1. Get the name of the Helm release. In the example below, the release name is `dremio`: + +```bash +$ helm list +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +dremio helm-demo 1 2020-08-10 08:45:20.038011 -0700 PDT deployed dremio-2.0.0 +``` + +Adding additional resources should be done by modifying your`values.local.yaml` file. + +* To modify the number of secondary coordinators, modify the value `coordinator.count` to be greater than 0. +* To modify the number of executors, modify the `executor.count`. If you have more than one engine and wish to scale a specific engine, see the [`executor.engineOverride`](../Values-Reference.md#executorengineoverride) section of the `Values.yaml` Reference documentation. + +Once you have made the appropriate customizations, run the following command to update your deployment with the changes: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` + diff --git a/charts/dremio_v2/docs/administration/Upgrading-Dremio.md b/charts/dremio_v2/docs/administration/Upgrading-Dremio.md new file mode 100644 index 00000000..47acd446 --- /dev/null +++ b/charts/dremio_v2/docs/administration/Upgrading-Dremio.md @@ -0,0 +1,33 @@ +# Upgrading Dremio + +1. Ensure that you have completed a backup of Dremio. See the [Dremio Administration](./Dremio-Administration.md) documentation on how to access `dremio-admin` commands to complete a backup prior to upgrading. +2. Update the Dremio `imageTag` value in your values.yaml file. + + For example, to update to `4.7.0`, update the tag to the following: + +```yaml +imageTag: 4.7.0 +[...] +``` + +3. Get the name of the Helm release. In the example below, the release name is `dremio`. + +```bash +$ helm list +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +dremio helm-demo 1 2020-08-10 08:45:20.038011 -0700 PDT deployed dremio-2.0.0 +``` + +4. Upgrade the deployment via `helm` upgrade command: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` + +The existing pods will be terminated and new pods will be created with the new image. You can monitor the status of the pods by running: + +```bash +$ kubectl get pods +``` + +Once all the pods are restarted and running, your Dremio cluster is upgraded. \ No newline at end of file diff --git a/charts/dremio_v2/docs/administration/Viewing-Logs.md b/charts/dremio_v2/docs/administration/Viewing-Logs.md new file mode 100644 index 00000000..bb07cf52 --- /dev/null +++ b/charts/dremio_v2/docs/administration/Viewing-Logs.md @@ -0,0 +1,26 @@ +# Viewing Logs + +**Dremio Logs** + +By default, Dremio logs are written to the container's console. All the logs - `server.log`, `server.gc` and `access.log` - are written into the console simultaneously. + +You can view the logs using `kubectl logs`: + +```bash +$ kubectl logs +``` + +You can also tail the logs using the `-f` parameter for `kubectl logs`: + +```bash +$ kubectl logs -f +``` + +**Pod Initialization Logs** + +In some cases, if the Dremio cluster is failing to startup during the pod init phases, it may be necessary to view the logs generated by the init containers. Identify the pod that is failing to start, and use the following command to view the init container logs: + +```bash +$ kubectl logs -c +``` + diff --git a/charts/dremio_v2/docs/setup/Custom-Dremio-Image.md b/charts/dremio_v2/docs/setup/Custom-Dremio-Image.md new file mode 100644 index 00000000..444e297f --- /dev/null +++ b/charts/dremio_v2/docs/setup/Custom-Dremio-Image.md @@ -0,0 +1,41 @@ +# Custom Dremio Image + +You may occasionally need to customize Dremio by adding an additional [Dremio ARP connector](https://www.dremio.com/hub/) or other JAR(s). The recommended approach is to create a new container image for Dremio containing the customization. + +### Prerequisites +* Local machine with Docker installed +* Private container image repository accessible by your Kubernetes cluster + +To create a custom container image, create a new directory containing the additional JAR(s) that you wish to add along with a `Dockerfile` with the following contents: + +```dockerfile +FROM dremio/dremio-oss:15.0.0 +USER root + +# To copy multiple files, change the below two lines to the following: +# COPY /opt/dremio/jars/ +# RUN chown 1000:1000 /opt/dremio/jars/ /opt/dremio/jars/ +COPY /opt/dremio/jars/ +RUN chown 1000:1000 /opt/dremio/jars/ + +# For Dremio ARP connectors, you may need to copy file(s) to /opt/dremio/jars/3rdparty/ as well. Uncomment the following lines as appropriate: +# +# (1) For a single file, uncomment the below two lines. +# COPY /opt/dremio/jars/3rdparty/ +# RUN chown 1000:1000 /opt/dremio/jars/3rdparty/ +# +# (2) Or, to copy multiple files, uncomment the below two lines. +# COPY /opt/dremio/jars/3rdparty/ +# RUN chown 1000:1000 /opt/dremio/jars/3rdparty/ /opt/dremio/jars/3rdparty/ + +USER dremio +``` + +Ensure that you have set the desired base image tag (i.e., `dremio/dremio-oss:15.0.0`) and replaced the name(s) of the desired file(s) to add. To create the image, from within the folder with the `Dockerfile`, invoke the following commands: + +```bash +docker build . -t /: +docker push /: +``` + +Once you have your new container image built and pushed, update your `values.local.yaml` file to set the `image` and `imageTag` values as appropriate. \ No newline at end of file diff --git a/charts/dremio_v2/docs/setup/Customizing-Dremio-Configuration.md b/charts/dremio_v2/docs/setup/Customizing-Dremio-Configuration.md new file mode 100644 index 00000000..f76819c5 --- /dev/null +++ b/charts/dremio_v2/docs/setup/Customizing-Dremio-Configuration.md @@ -0,0 +1,13 @@ +# Customizing Dremio Configuration + +Dremio configuration files used by the deployment are in the `config` directory. These files are propagated to all the pods in the cluster. + +To update the configuration used in the pods, run the Helm upgrade command: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` + +To see all the configuration customizations, please see the [Customizing Configuration](https://docs.dremio.com/deployment/README-config.html) documentation for Dremio. + +For users who wish to setup a Hive 2/3 source, please see the [Setup Hive 2 and 3](./Setup-Hive-2-and-3.md) documentation. \ No newline at end of file diff --git a/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md b/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md new file mode 100644 index 00000000..2281a309 --- /dev/null +++ b/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md @@ -0,0 +1,17 @@ +# Important Setup Considerations + +As part of setting up a Dremio cluster on Kubernetes, there are a number of important considerations that we recommend you review before deploying your cluster. Some of these values have an impact on the performance of your cluster and should be adjusted to your needs. + +* `imageTag`: As part of setup, this value should be updated to reference the exact version of Dremio you wish to deploy, i.e. `4.7.0`. +* `distStorage.type`: By default, the `distStorage.type` is set to `local`. This **must** be changed prior to production use. We do not recommend users use local distributed storage as part of a production setup. +* `volumeSize` and `storageClass`: The size and type of volume used for Dremio has a direct impact on performance. In most Kubernetes providers, volume size has a direct impact on the performance in IOPS and read/write speeds. It is important to check your Kubernetes provider to determine how volume size impacts the performance of your disk. +* `executor.cloudCache.storageClass`: Dremio C3 was designed to be used with performant NVMe storage. By default, the chart utilizes the default storage class that is configured on the Kubernetes cluster. For the major Kubernetes providers, NVMe storage is often available on appropriately sized nodes. We recommend utilizing a local storage provisioner to unlock the benefits of NVMe storage available on the physical Kubernetes nodes. For more information, see the [Kubernetes Special Interest Group for Local Static Provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner). +* `service.sessionAffinity`: By default, the `service.sessionAffinity` is set to `false`. We currently recommend leaving this value as `false` unless you are using Flight, in which case you should consider the following factors: + * When the Flight client is being used and this value is set to `false`, there are cases where the `DoGet` call happens on a different TCP connection than the original `GetFlightInfo` call. + * For the Java Flight client, this happens when a different `ManagedChannel` is used for different `FlightClient` instances for different Dremio Users. + * For the Python Flight client, this happens when a different `FlightClient` is initialized for different Dremio Users. + * In the cases described above, the `DoGet` call goes to a different coordinator than the one that originally created the query plan. + * This causes the query plan to be regenerated, which is less efficient than the case where both the `DoGet` and the `GetFlightInfo` calls go to the same coordinator. + * When `service.sessionAffinity` is set to `true`, all the TCP connections from a particular client IP will be routed to a specific Dremio coordinator. + +For users who wish to setup a Hive 2/3 source, please see the [Setup Hive 2 and 3](./Setup-Hive-2-and-3.md) documentation. \ No newline at end of file diff --git a/charts/dremio_v2/docs/setup/Migrating-Helm-Chart-Versions.md b/charts/dremio_v2/docs/setup/Migrating-Helm-Chart-Versions.md new file mode 100644 index 00000000..6dcd5381 --- /dev/null +++ b/charts/dremio_v2/docs/setup/Migrating-Helm-Chart-Versions.md @@ -0,0 +1,71 @@ +# Migrating Helm Chart Versions + +**⚠️ If the contents of your chart's `templates` directory has been modified, this guide may not cover the changes necessary to maintain your existing setup.** As new functionality has been added to the Helm chart, please check whether the new version of the chart allows you to express the same modifications that have been made to your templates directory. + +**Note**: Helm 2 is no longer supported in this Helm chart. + +1. First begin by overriding the values in `values.yaml` to reflect the original chart's values. + + ***Tip***: As a best practice, we recommend creating a `values.local.yaml` (or equivalently named file) that stores the values that you wish to override as part of your setup of Dremio. This allows you to quickly update to the latest version of the chart by copying the `values.local.yaml` across Helm chart updates. + + In the new version of the Dremio Helm chart, changes have been introduced to the `values.yaml` file that differ from the original Dremio Helm chart. The chart below maps existing values and shows their equivalents in this Helm chart's `values.yaml`. + +| Original Value | New Value | +| ------------------------------------- | ------------------------------------------------------------ | +| `executor.cloudCache.quota.fs_pct` | **Removed** - In the new chart, we now require provisioning a persistent volume for Cloud Cache. | +| `executor.cloudCache.quota.db_pct` | **Removed** - In the new chart, we now require provisioning a persistent volume for Cloud Cache. | +| `executor.cloudCache.quota.cache_pct` | **Removed** - In the new chart, we now require provisioning a persistent volume for Cloud Cache. | +| `tls.ui.enabled` | `coordinator.ui.tls.enabled` | +| `tls.ui.secret` | `coordinator.ui.tls.secret` | +| `tls.client.enabled` | `coordinator.client.tls.enabled` | +| `tls.client.secret` | `coordinator.client.tls.secret` | +| `serviceType` | `service.type` | +| `sessionAffinity` | `service.sessionAffinity` | +| `internalLoadBalancer` | `service.internalLoadbalancer` | +| `imagePullSecrets` | `imagePullSecrets` is no longer a string based-value. This is now an array, which can be represented as follows: `imagePullSecrets: ["original-value"]`. | +| `distStorage.aws.accessKey` | `distStorage.aws.credentials.accessKey` ***Note***: If using access key and secret for authentication, the value of `distStorage.aws.authentication` must also be set to `accesskeySecret`. | +| `distStorage.aws.secret` | `distStorage.aws.credentials.secret` ***Note***: If using access key and secret for authentication, the value of `distStorage.aws.authentication` must also be set to `accesskeySecret`. | +| `distStorage.azure.applicationId` | `distStorage.azure.credentials.applicationId` | +| `distStorage.azure.secret` | `distStorage.azure.credentials.secret` | +| `distStorage.azure.oauth2EndPoint` | `distStorage.azure.credentials.oauth2Endpoint` ***Note***: The capitalization has changed in this value from `EndPoint` to `Endpoint`. | +| `distStorage.azureStorage.accessKey` | `distStorage.azureStorage.credentials.accessKey` | + +2. This chart introduces the concept of engines. Engines operate as a grouping of executor nodes that can be targeted via queues to handle specific workloads. + + As part of the transition to this Helm chart, to retain the existing persistent volumes used for the executor nodes, ensure that you keep a `default` engine as provided by the `values.yaml`. Additionally, set the value of `executor.engineOverride.default.volumeClaimName` to be `dremio-executor-volume`. + + For example, you would want to do the following to setup the `volumeClaimName`: + +```yaml +executor: + [...] + engineOverride: + default: + volumeClaimName: dremio-executor-volume +``` + +3. **With any upgrade, we recommend creating a backup first before performing an upgrade.** + + See the [Dremio Administration](../administration/Dremio-Administration.md) documentation on how to access `dremio-admin` commands to complete a backup prior to upgrading. + +4. We are now ready to begin upgrading to the new Helm chart. Begin by uninstalling the existing Helm chart for Dremio by using the `helm` command. + + Note: The data will persist in the persistent volumes through this process. + +```bash +$ helm uninstall +``` + +If your original deployment of the Dremio chart was with Helm 2, uninstall the Helm chart for Dremio by using the following command with the Helm 2 binary: + +```bash +$ helm2 delete --purge +``` + +5. Now, invoke `helm` again to install the new version of the chart: + +```bash +$ helm install dremio_v2 -f values.local.yaml +``` + +6. **Done!** Verify that the upgrade was successful. \ No newline at end of file diff --git a/charts/dremio_v2/docs/setup/Setup-Hive-2-and-3.md b/charts/dremio_v2/docs/setup/Setup-Hive-2-and-3.md new file mode 100644 index 00000000..8f34f0ba --- /dev/null +++ b/charts/dremio_v2/docs/setup/Setup-Hive-2-and-3.md @@ -0,0 +1,9 @@ +# Setup Hive 2 and 3 + +To setup Hive 2/3 in the Helm chart, locate the respective `config/hive2` or `config/hive3` directory to copy your necessary configuration files for Hive, i.e. `core-site.xml`. + +To update the configuration files in the pods, run the Helm upgrade command: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_coordinator.tpl b/charts/dremio_v2/templates/_helpers_coordinator.tpl new file mode 100644 index 00000000..81cd5d29 --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_coordinator.tpl @@ -0,0 +1,153 @@ +{{/* +Coordinator - Dremio Heap Memory allocation +*/}} +{{- define "dremio.coordinator.heapMemory" -}} +{{- $coordinatorMemory := int $.Values.coordinator.memory -}} +{{- if gt 4096 $coordinatorMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 18432 $coordinatorMemory -}} +16384 +{{- else -}} +{{- sub $coordinatorMemory 2048}} +{{- end -}} +{{- end -}} + +{{/* +Coordiantor - Dremio Direct Memory Allocation +*/}} +{{- define "dremio.coordinator.directMemory" -}} +{{- $coordinatorMemory := int $.Values.coordinator.memory -}} +{{- if gt 4096 $coordinatorMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 18432 $coordinatorMemory -}} +{{- sub $coordinatorMemory 16384 -}} +{{- else -}} +2048 +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Service Account +*/}} +{{- define "dremio.coordinator.serviceAccount" -}} +{{- $coordinatorServiceAccount := coalesce $.Values.coordinator.serviceAccount $.Values.serviceAccount -}} +{{- if $coordinatorServiceAccount -}} +serviceAccountName: {{ $coordinatorServiceAccount }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Dremio Start Parameters +*/}} +{{- define "dremio.coordinator.extraStartParams" -}} +{{- $coordinatorExtraStartParams := coalesce $.Values.coordinator.extraStartParams $.Values.extraStartParams -}} +{{- if $coordinatorExtraStartParams}} +{{- printf "%v " $coordinatorExtraStartParams -}} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Extra Init Containers +*/}} +{{- define "dremio.coordinator.extraInitContainers" -}} +{{- $coordinatorExtraInitContainers := coalesce $.Values.coordinator.extraInitContainers $.Values.extraInitContainers -}} +{{- if $coordinatorExtraInitContainers -}} +{{ tpl $coordinatorExtraInitContainers $ }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Extra Volume Mounts +*/}} +{{- define "dremio.coordinator.extraVolumeMounts" -}} +{{- $coordinatorExtraVolumeMounts := default (default (dict) $.Values.extraVolumeMounts) $.Values.coordinator.extraVolumeMounts -}} +{{- if $coordinatorExtraVolumeMounts -}} +{{ toYaml $coordinatorExtraVolumeMounts }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Extra Volumes +*/}} +{{- define "dremio.coordinator.extraVolumes" -}} +{{- $coordinatorExtraVolumes := coalesce $.Values.coordinator.extraVolumes $.Values.extraVolumes -}} +{{- if $coordinatorExtraVolumes -}} +{{ toYaml $coordinatorExtraVolumes }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Storage Class +*/}} +{{- define "dremio.coordinator.storageClass" -}} +{{- $coordinatorStorageClass := coalesce $.Values.coordinator.storageClass $.Values.storageClass -}} +{{- if $coordinatorStorageClass -}} +storageClassName: {{ $coordinatorStorageClass }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - StatefulSet Annotations +*/}} +{{- define "dremio.coordinator.annotations" -}} +{{- $coordinatorAnnotations := coalesce $.Values.coordinator.annotations $.Values.annotations -}} +{{- if $coordinatorAnnotations -}} +annotations: + {{- toYaml $coordinatorAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - StatefulSet Labels +*/}} +{{- define "dremio.coordinator.labels" -}} +{{- $coordinatorLabels := coalesce $.Values.coordinator.labels $.Values.labels -}} +{{- if $coordinatorLabels -}} +labels: + {{- toYaml $coordinatorLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Annotations +*/}} +{{- define "dremio.coordinator.podAnnotations" -}} +{{- $coordiantorPodAnnotations := coalesce $.Values.coordinator.podAnnotations $.Values.podAnnotations -}} +{{- if $coordiantorPodAnnotations -}} +{{ toYaml $coordiantorPodAnnotations }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Labels +*/}} +{{- define "dremio.coordinator.podLabels" -}} +{{- $coordinatorPodLabels := coalesce $.Values.coordinator.podLabels $.Values.podLabels -}} +{{- if $coordinatorPodLabels -}} +{{ toYaml $coordinatorPodLabels }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Node Selectors +*/}} +{{- define "dremio.coordinator.nodeSelector" -}} +{{- $coordinatorNodeSelector := coalesce $.Values.coordinator.nodeSelector $.Values.nodeSelector -}} +{{- if $coordinatorNodeSelector -}} +nodeSelector: + {{- toYaml $coordinatorNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Tolerations +*/}} +{{- define "dremio.coordinator.tolerations" -}} +{{- $coordinatorTolerations := coalesce $.Values.coordinator.tolerations $.Values.tolerations -}} +{{- if $coordinatorTolerations -}} +tolerations: + {{- toYaml $coordinatorTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_executor.tpl b/charts/dremio_v2/templates/_helpers_executor.tpl new file mode 100644 index 00000000..d8ae6d21 --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_executor.tpl @@ -0,0 +1,332 @@ + +{{/* +Executor - Dremio Heap Memory Allocation +*/}} +{{- define "dremio.executor.heapMemory" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineMemory := int (default $context.Values.executor.memory $engineConfiguration.memory) -}} +{{- if gt 4096 $engineMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 32786 $engineMemory -}} +8192 +{{- else if le 6144 $engineMemory -}} +4096 +{{- else -}} +2048 +{{- end -}} +{{- end -}} + +{{/* +Executor - Dremio Direct Memory Allocation +*/}} +{{- define "dremio.executor.directMemory" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineMemory := int (default $context.Values.executor.memory $engineConfiguration.memory) -}} +{{- if gt 4096 $engineMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 32786 $engineMemory -}} +{{- sub $engineMemory 8192 -}} +{{- else if le 6144 $engineMemory -}} +{{- sub $engineMemory 6144 -}} +{{- else -}} +{{- sub $engineMemory 2048 -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - CPU Resource Request +*/}} +{{- define "dremio.executor.cpu" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineCpu := default ($context.Values.executor.cpu) $engineConfiguration.cpu -}} +{{- $engineCpu -}} +{{- end -}} + +{{/* +Executor - Memory Resource Request +*/}} +{{- define "dremio.executor.memory" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineMemory := default ($context.Values.executor.memory) $engineConfiguration.memory -}} +{{- $engineMemory -}}M +{{- end -}} + +{{/* +Executor - Replication Count +*/}} +{{- define "dremio.executor.count" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineCount := default ($context.Values.executor.count) $engineConfiguration.count -}} +{{- $engineCount -}} +{{- end -}} + +{{/* +Executor - ConfigMap +*/}} +{{- define "dremio.executor.config" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- if hasKey (default (dict) $context.Values.executor.engineOverride) $engineName -}} +{{- printf "dremio-config-%v" $engineName -}} +{{- else -}} +dremio-config +{{- end -}} +{{- end -}} + +{{/* +Executor - Service Account +*/}} +{{- define "dremio.executor.serviceAccount" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineServiceAccount := coalesce $engineConfiguration.serviceAccount $context.Values.executor.serviceAccount $context.Values.serviceAccount -}} +{{- if $engineServiceAccount -}} +serviceAccountName: {{ $engineServiceAccount }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Dremio Start Parameters +*/}} +{{- define "dremio.executor.extraStartParams" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraStartParams := coalesce $engineConfiguration.extraStartParams $context.Values.executor.extraStartParams $context.Values.extraStartParams -}} +{{- if $engineExtraStartParams}} +{{- printf "%v " $engineExtraStartParams -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Extra Init Containers +*/}} +{{- define "dremio.executor.extraInitContainers" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraInitContainers := coalesce $engineConfiguration.extraInitContainers $context.Values.executor.extraInitContainers $context.Values.extraInitContainers -}} +{{- if $engineExtraInitContainers -}} +{{ tpl $engineExtraInitContainers $context }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Extra Volume Mounts +*/}} +{{- define "dremio.executor.extraVolumeMounts" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraVolumeMounts := coalesce $engineConfiguration.extraVolumeMounts $context.Values.executor.extraVolumeMounts $context.Values.extraVolumeMounts -}} +{{- if $engineExtraVolumeMounts -}} +{{ toYaml $engineExtraVolumeMounts }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Extra Volume Mounts +*/}} +{{- define "dremio.executor.extraVolumes" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraVolumes := coalesce $engineConfiguration.extraVolumes $context.Values.executor.extraVolumes $context.Values.extraVolumes -}} +{{- if $engineExtraVolumes -}} +{{ toYaml $engineExtraVolumes }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Persistent Volume Storage Class +*/}} +{{- define "dremio.executor.storageClass" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineStorageClass := coalesce $engineConfiguration.storageClass $context.Values.executor.storageClass $context.Values.storageClass -}} +{{- if $engineStorageClass -}} +storageClassName: {{ $engineStorageClass }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Cloud Cache Peristent Volume Claims +*/}} +{{- define "dremio.executor.cloudCache.volumeClaimTemplate" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineCloudCacheConfig := default (dict) $engineConfiguration.cloudCache -}} +{{- $cloudCacheConfig := coalesce $engineConfiguration.cloudCache $context.Values.executor.cloudCache -}} +{{- $cloudCacheStorageClass := coalesce $engineCloudCacheConfig.storageClass $context.Values.executor.cloudCache.storageClass $engineConfiguration.storageClass $context.Values.executor.storageClass $context.Values.storageClass -}} +{{- if $cloudCacheConfig.enabled -}} +{{- range $index, $cloudCacheVolumeConfig := $cloudCacheConfig.volumes }} +{{- $volumeStorageClass := coalesce $cloudCacheVolumeConfig.storageClass $cloudCacheStorageClass }} +- metadata: + name: {{ coalesce $cloudCacheVolumeConfig.name (printf "dremio-%s-executor-c3-%d" $engineName $index) }} + spec: + accessModes: ["ReadWriteOnce"] + {{- if $volumeStorageClass }} + storageClassName: {{ $volumeStorageClass }} + {{- end }} + resources: + requests: + storage: {{ $cloudCacheVolumeConfig.size }} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - Cloud Cache Peristent Volume Mounts +*/}} +{{- define "dremio.executor.cloudCache.volumeMounts" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $cloudCacheConfig := coalesce $engineConfiguration.cloudCache $context.Values.executor.cloudCache -}} +{{- if $cloudCacheConfig.enabled -}} +{{- range $index, $cloudCacheVolumeConfig := $cloudCacheConfig.volumes }} +- name: {{ coalesce $cloudCacheVolumeConfig.name (printf "dremio-%s-executor-c3-%d" $engineName $index) }} + mountPath: /opt/dremio/cloudcache/c{{ $index }} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - Cloud Cache Peristent Volume Mounts +*/}} +{{- define "dremio.executor.cloudCache.initContainers" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $cloudCacheConfig := coalesce $engineConfiguration.cloudCache $context.Values.executor.cloudCache -}} +{{- if $cloudCacheConfig.enabled -}} +- name: chown-cloudcache-directory + image: {{ $context.Values.image }}:{{ $context.Values.imageTag }} + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + volumeMounts: + {{- include "dremio.executor.cloudCache.volumeMounts" (list $context $engineName) | nindent 2 }} + command: ["bash"] + args: ["-c", "chown dremio:dremio /opt/dremio/cloudcache/c*"] +{{- end -}} +{{- end -}} + +{{/* +Executor - Persistent Volume Size +*/}} +{{- define "dremio.executor.volumeSize" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineVolumeSize := default ($context.Values.executor.volumeSize) $engineConfiguration.volumeSize -}} +{{- $engineVolumeSize -}} +{{- end -}} + +{{/* +Executor - Persistent Volume Name +*/}} +{{- define "dremio.executor.volumeClaimName" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $volumeClaimName := default (printf "dremio-%v-executor-volume" $engineName) $engineConfiguration.volumeClaimName -}} +{{- $volumeClaimName -}} +{{- end -}} + +{{/* +Executor - StatefulSet Annotations +*/}} +{{- define "dremio.executor.annotations" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineAnnotations := coalesce $engineConfiguration.annotations $context.Values.executor.annotations $context.Values.annotations -}} +{{- if $engineAnnotations -}} +annotations: + {{- toYaml $engineAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Executor - StatefulSet Labels +*/}} +{{- define "dremio.executor.labels" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineLabels := coalesce $engineConfiguration.labels $context.Values.executor.labels $context.Values.labels -}} +{{- if $engineLabels -}} +labels: + {{- toYaml $engineLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Annotations +*/}} +{{- define "dremio.executor.podAnnotations" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $enginePodAnnotations := coalesce $engineConfiguration.podAnnotations $context.Values.executor.podAnnotations $context.Values.podAnnotations -}} +{{- if $enginePodAnnotations -}} +{{ toYaml $enginePodAnnotations }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Labels +*/}} +{{- define "dremio.executor.podLabels" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $enginePodLabels := coalesce $engineConfiguration.podLabels $context.Values.executor.podLabels $context.Values.podLabels -}} +{{- if $enginePodLabels -}} +{{ toYaml $enginePodLabels }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Node Selectors +*/}} +{{- define "dremio.executor.nodeSelector" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineNodeSelector := coalesce $engineConfiguration.nodeSelector $context.Values.executor.nodeSelector $context.Values.nodeSelector -}} +{{- if $engineNodeSelector -}} +nodeSelector: + {{- toYaml $engineNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Tolerations +*/}} +{{- define "dremio.executor.tolerations" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineTolerations := coalesce $engineConfiguration.tolerations $context.Values.executor.tolerations $context.Values.tolerations -}} +{{- if $engineTolerations -}} +tolerations: + {{- toYaml $engineTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_general.tpl b/charts/dremio_v2/templates/_helpers_general.tpl new file mode 100644 index 00000000..9d5f2c55 --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_general.tpl @@ -0,0 +1,86 @@ +{{/* +Shared - Image Pull Secrets +*/}} +{{- define "dremio.imagePullSecrets" -}} +{{- if $.Values.imagePullSecrets }} +imagePullSecrets: +{{- range $secretName := $.Values.imagePullSecrets }} +- name: {{ $secretName }} +{{- end}} +{{- end -}} +{{- end -}} + +{{/* +Service - Annotations +*/}} +{{- define "dremio.service.annotations" -}} +{{- $serviceAnnotations := coalesce $.Values.service.annotations $.Values.annotations -}} +{{- if $.Values.service.internalLoadBalancer }} +annotations: + service.beta.kubernetes.io/azure-load-balancer-internal: "true" + cloud.google.com/load-balancer-type: "Internal" + service.beta.kubernetes.io/aws-load-balancer-internal: 0.0.0.0/0 + {{- if $serviceAnnotations -}} + {{- toYaml $serviceAnnotations | nindent 2 -}} + {{- end -}} +{{- else -}} +{{ if $serviceAnnotations }} +annotations: + {{- toYaml $serviceAnnotations | nindent 4 -}} +{{- end -}} +{{- end }} +{{- end -}} + +{{/* +Service - Labels +*/}} +{{- define "dremio.service.labels" -}} +{{- $serviceLabels := coalesce $.Values.service.labels $.Values.labels -}} +{{- if $serviceLabels -}} +{{- toYaml $serviceLabels }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Annotations +*/}} +{{- define "dremio.admin.podAnnotations" -}} +{{- $adminPodAnnotations := coalesce $.Values.coordinator.podAnnotations $.Values.podAnnotations -}} +{{- if $adminPodAnnotations -}} +annotations: + {{- toYaml $adminPodAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Labels +*/}} +{{- define "dremio.admin.podLabels" -}} +{{- $adminPodLabels := coalesce $.Values.coordinator.podLabels $.Values.podLabels -}} +{{- if $adminPodLabels -}} +labels: + {{- toYaml $adminPodLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Node Selectors +*/}} +{{- define "dremio.admin.nodeSelector" -}} +{{- $adminNodeSelector := coalesce $.Values.coordinator.nodeSelector $.Values.nodeSelector -}} +{{- if $adminNodeSelector -}} +nodeSelector: + {{- toYaml $adminNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Tolerations +*/}} +{{- define "dremio.admin.tolerations" -}} +{{- $adminPodTolerations := coalesce $.Values.coordinator.tolerations $.Values.tolerations -}} +{{- if $adminPodTolerations -}} +tolerations: + {{- toYaml $adminPodTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_zookeeper.tpl b/charts/dremio_v2/templates/_helpers_zookeeper.tpl new file mode 100644 index 00000000..f3774a00 --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_zookeeper.tpl @@ -0,0 +1,92 @@ +{{/* +Zookeeper - Memory Calculation +*/}} +{{- define "dremio.zookeeper.memory" -}} +{{- $heapMemory := sub (int $.Values.zookeeper.memory) 100 -}} +{{- $heapMemory -}} +{{- end -}} + +{{/* +Zookeeper - Storage Class +*/}} +{{- define "dremio.zookeeper.storageClass" -}} +{{- $zookeeperStorageClass := coalesce $.Values.zookeeper.storageClass $.Values.storageClass -}} +{{- if $zookeeperStorageClass -}} +storageClassName: {{ $zookeeperStorageClass }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Service Account +*/}} +{{- define "dremio.zookeeper.serviceAccount" -}} +{{- $zookeeperServiceAccount := coalesce $.Values.zookeeper.serviceAccount $.Values.serviceAccount -}} +{{- if $zookeeperServiceAccount -}} +serviceAccountName: {{ $zookeeperServiceAccount }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - StatefulSet Annotations +*/}} +{{- define "dremio.zookeeper.annotations" -}} +{{- $zookeeperAnnotations := coalesce $.Values.zookeeper.annotations $.Values.annotations -}} +{{- if $zookeeperAnnotations -}} +annotations: + {{- toYaml $zookeeperAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - StatefulSet Labels +*/}} +{{- define "dremio.zookeeper.labels" -}} +{{- $zookeeperLabels := coalesce $.Values.zookeeper.labels $.Values.labels -}} +{{- if $zookeeperLabels -}} +labels: + {{- toYaml $zookeeperLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Annotations +*/}} +{{- define "dremio.zookeeper.podAnnotations" -}} +{{- $coordiantorAnnotations := coalesce $.Values.zookeeper.podAnnotations $.Values.podAnnotations -}} +{{- if $coordiantorAnnotations -}} +annotations: + {{- toYaml $coordiantorAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Labels +*/}} +{{- define "dremio.zookeeper.podLabels" -}} +{{- $zookeeperLabels := coalesce $.Values.zookeeper.podLabels $.Values.podLabels -}} +{{- if $zookeeperLabels -}} +{{ toYaml $zookeeperLabels }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Node Selectors +*/}} +{{- define "dremio.zookeeper.nodeSelector" -}} +{{- $zookeeperNodeSelector := coalesce $.Values.zookeeper.nodeSelector $.Values.nodeSelector -}} +{{- if $zookeeperNodeSelector -}} +nodeSelector: + {{- toYaml $zookeeperNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Tolerations +*/}} +{{- define "dremio.zookeeper.tolerations" -}} +{{- $zookeeperTolerations := coalesce $.Values.zookeeper.tolerations $.Values.tolerations -}} +{{- if $zookeeperTolerations -}} +tolerations: + {{- toYaml $zookeeperTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-admin.yaml b/charts/dremio_v2/templates/dremio-admin.yaml new file mode 100644 index 00000000..10de4317 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-admin.yaml @@ -0,0 +1,41 @@ +{{- if $.Values.DremioAdmin -}} +# dremio-admin pod is used to run offline commands like +# clean, restore or set-password against the Dremio cluster. +# The Dremio cluster should be shutdown before attempting to +# create the dremio-admin pod. +# You connect to the pod (kubectl exec -it dremio-admin -- bash), +# go to /opt/dremio/bin and run dremio-admin commands as documented. +apiVersion: v1 +kind: Pod +metadata: + name: dremio-admin + {{- include "dremio.admin.podAnnotations" $ | nindent 2}} + {{- include "dremio.admin.podLabels" $ | nindent 2}} +spec: + containers: + - name: dremio-admin + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + stdin: true + tty: true + resources: + requests: + cpu: {{ $.Values.coordinator.cpu }} + memory: {{ $.Values.coordinator.memory }}M + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + command: ["sleep", "infinity"] + {{- include "dremio.imagePullSecrets" $ | nindent 2 }} + {{- include "dremio.admin.nodeSelector" $ | nindent 2 }} + {{- include "dremio.admin.tolerations" $ | nindent 2 }} + volumes: + - name: dremio-master-volume + persistentVolumeClaim: + claimName: dremio-master-volume-dremio-master-0 + - name: dremio-config + configMap: + name: dremio-config +{{- end -}} diff --git a/charts/dremio_v2/templates/dremio-configmap.yaml b/charts/dremio_v2/templates/dremio-configmap.yaml new file mode 100644 index 00000000..34fa67aa --- /dev/null +++ b/charts/dremio_v2/templates/dremio-configmap.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-config +data: + {{- tpl ($.Files.Glob "config/*").AsConfig . | nindent 2 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-hive2-config +data: + {{- tpl ($.Files.Glob "config/hive2/*").AsConfig . | nindent 2 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-hive3-config +data: + {{- tpl ($.Files.Glob "config/hive3/*").AsConfig . | nindent 2 }} +--- +{{- $originalValues := mustDeepCopy $.Values -}} +{{- range $engine := keys (default (dict) $.Values.executor.engineOverride) -}} +{{- if has $engine $.Values.executor.engines -}} +{{- $_ := mustMergeOverwrite $.Values $originalValues -}} +{{- $engineAwareConfig := mustDeepCopy $.Values -}} +{{- $_ := set $engineAwareConfig "currentEngine" $engine }} +{{- $_ := mustMergeOverwrite $engineAwareConfig.executor (get $.Values.executor.engineOverride $engine) -}} +{{- $_ := mustMergeOverwrite $.Values $engineAwareConfig -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-config-{{ $engine }} +data: + {{- tpl ($.Files.Glob "config/*").AsConfig $ | nindent 2 }} +--- +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-coordinator.yaml b/charts/dremio_v2/templates/dremio-coordinator.yaml new file mode 100644 index 00000000..dc5d8426 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-coordinator.yaml @@ -0,0 +1,179 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: dremio-coordinator + {{- include "dremio.coordinator.annotations" $ | nindent 2 }} + {{- include "dremio.coordinator.labels" $ | nindent 2 }} +spec: + serviceName: "dremio-cluster-pod" + replicas: {{ $.Values.coordinator.count }} + podManagementPolicy: "Parallel" + revisionHistoryLimit: 1 + selector: + matchLabels: + app: dremio-coordinator + template: + metadata: + labels: + app: dremio-coordinator + role: dremio-cluster-pod + {{- include "dremio.coordinator.podLabels" $ | nindent 8 }} + annotations: + dremio-configmap/checksum: {{ (tpl ($.Files.Glob "config/*").AsConfig $) | sha256sum }} + {{- include "dremio.coordinator.podAnnotations" $ | nindent 8 }} + spec: + {{- include "dremio.coordinator.serviceAccount" $ | nindent 6 }} + terminationGracePeriodSeconds: 120 + {{- include "dremio.coordinator.nodeSelector" $ | nindent 6 }} + {{- include "dremio.coordinator.tolerations" $ | nindent 6 }} + containers: + - name: dremio-coordinator + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: {{ $.Values.coordinator.cpu }} + memory: {{ $.Values.coordinator.memory }}M + volumeMounts: + - name: dremio-config + mountPath: /opt/dremio/conf + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2.d + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2-ee.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3-ee.d + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} + {{- include "dremio.coordinator.extraVolumeMounts" $ | nindent 8 }} + env: + - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.heapMemory" $ }}" + - name: DREMIO_MAX_DIRECT_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.directMemory" $ }}" + - name: DREMIO_JAVA_SERVER_EXTRA_OPTS + value: >- + {{- include "dremio.coordinator.extraStartParams" $ | nindent 12 -}} + -Dzookeeper=zk-hs:2181 + -Dservices.coordinator.enabled=true + -Dservices.coordinator.master.enabled=false + -Dservices.coordinator.master.embedded-zookeeper.enabled=false + -Dservices.executor.enabled=false + -Dservices.conduit.port=45679 + - name: AWS_CREDENTIAL_PROFILES_FILE + value: "/opt/dremio/aws/credentials" + - name: AWS_SHARED_CREDENTIALS_FILE + value: "/opt/dremio/aws/credentials" + command: ["/opt/dremio/bin/dremio"] + args: ["start-fg"] + ports: + - containerPort: 31010 + name: client + - containerPort: 32010 + name: flight + - containerPort: 45678 + name: server-fabric + - containerPort: 45679 + name: server-conduit + readinessProbe: + httpGet: + path: / + {{- if $.Values.coordinator.web.tls.enabled }} + scheme: HTTPS + {{- end }} + port: 9047 + initialDelaySeconds: 5 + periodSeconds: 5 + initContainers: + {{- include "dremio.coordinator.extraInitContainers" $ | nindent 6 }} + - name: wait-for-dremio-master + image: busybox + command: ["sh", "-c", "until nc -z dremio-client {{ $.Values.coordinator.web.port }} > /dev/null; do echo Waiting for Dremio master.; sleep 2; done;"] + {{- if $.Values.coordinator.web.tls.enabled }} + - name: generate-ui-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/ui.pkcs12", "-passout", "pass:"] + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: generate-client-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/client.pkcs12", "-passout", "pass:"] + {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: generate-flight-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-flight + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/flight.pkcs12", "-passout", "pass:"] + {{- end }} + volumes: + - name: dremio-config + configMap: + name: dremio-config + - name: dremio-hive2-config + configMap: + name: dremio-hive2-config + - name: dremio-hive3-config + configMap: + name: dremio-hive3-config + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if $.Values.coordinator.web.tls.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ $.Values.coordinator.web.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ $.Values.coordinator.client.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: dremio-tls-secret-flight + secret: + secretName: {{ $.Values.coordinator.flight.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- include "dremio.coordinator.extraVolumes" $ | nindent 6 }} + {{- include "dremio.imagePullSecrets" $ | nindent 6}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-executor.yaml b/charts/dremio_v2/templates/dremio-executor.yaml new file mode 100644 index 00000000..fffdc53c --- /dev/null +++ b/charts/dremio_v2/templates/dremio-executor.yaml @@ -0,0 +1,121 @@ +{{- if not $.Values.DremioAdmin -}} +{{- range $engineIndex, $engineName := $.Values.executor.engines -}} +{{- $executorName := ternary "dremio-executor" (printf "dremio-executor-%s" $engineName) (eq $engineName "default") -}} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ $executorName }} + {{- include "dremio.executor.annotations" (list $ $engineName) | nindent 2}} + {{- include "dremio.executor.labels" (list $ $engineName) | nindent 2}} +spec: + serviceName: "dremio-cluster-pod" + replicas: {{ template "dremio.executor.count" (list $ $engineName) }} + podManagementPolicy: "Parallel" + revisionHistoryLimit: 1 + selector: + matchLabels: + app: {{ $executorName }} + template: + metadata: + labels: + app: {{ $executorName }} + role: dremio-cluster-pod + {{- include "dremio.executor.podLabels" (list $ $engineName) | nindent 8}} + annotations: + dremio-configmap/checksum: {{ (tpl ($.Files.Glob "config/*").AsConfig $) | sha256sum }} + {{- include "dremio.executor.podAnnotations" (list $ $engineName) | nindent 8}} + spec: + {{- include "dremio.executor.serviceAccount" (list $ $engineName) | nindent 6}} + terminationGracePeriodSeconds: 120 + {{- include "dremio.executor.nodeSelector" (list $ $engineName) | nindent 6}} + {{- include "dremio.executor.tolerations" (list $ $engineName) | nindent 6}} + containers: + - name: dremio-executor + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: {{ template "dremio.executor.cpu" (list $ $engineName) }} + memory: {{ template "dremio.executor.memory" (list $ $engineName) }} + volumeMounts: + - name: {{ template "dremio.executor.volumeClaimName" (list $ $engineName) }} + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2.d + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2-ee.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3-ee.d + {{- include "dremio.executor.cloudCache.volumeMounts" (list $ $engineName) | nindent 8 }} + {{- include "dremio.executor.extraVolumeMounts" (list $ $engineName) | nindent 8 }} + env: + - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB + value: "{{ template "dremio.executor.heapMemory" (list $ $engineName) }}" + - name: DREMIO_MAX_DIRECT_MEMORY_SIZE_MB + value: "{{ template "dremio.executor.directMemory" (list $ $engineName) }}" + - name: DREMIO_JAVA_SERVER_EXTRA_OPTS + value: >- + {{- include "dremio.executor.extraStartParams" (list $ $engineName) | nindent 12 -}} + -Dzookeeper=zk-hs:2181 + -Dservices.coordinator.enabled=false + -Dservices.coordinator.master.enabled=false + -Dservices.coordinator.master.embedded-zookeeper.enabled=false + -Dservices.executor.enabled=true + -Dservices.conduit.port=45679 + -Dservices.node-tag={{ $engineName }} + - name: AWS_CREDENTIAL_PROFILES_FILE + value: "/opt/dremio/aws/credentials" + - name: AWS_SHARED_CREDENTIALS_FILE + value: "/opt/dremio/aws/credentials" + command: ["/opt/dremio/bin/dremio"] + args: ["start-fg"] + ports: + - containerPort: 45678 + name: server-fabric + - containerPort: 45679 + name: server-conduit + initContainers: + {{- include "dremio.executor.extraInitContainers" (list $ $engineName) | nindent 6 }} + - name: chown-data-directory + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + volumeMounts: + - name: {{ template "dremio.executor.volumeClaimName" (list $ $engineName) }} + mountPath: /opt/dremio/data + command: ["chown"] + args: ["dremio:dremio", "/opt/dremio/data"] + {{- include "dremio.executor.cloudCache.initContainers" (list $ $engineName) | nindent 6 }} + - name: wait-for-zookeeper + image: busybox + command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo Waiting for Zookeeper to be ready.; sleep 2; done;"] + volumes: + - name: dremio-config + configMap: + name: {{ template "dremio.executor.config" (list $ $engineName) }} + - name: dremio-hive2-config + configMap: + name: dremio-hive2-config + - name: dremio-hive3-config + configMap: + name: dremio-hive3-config + {{- include "dremio.executor.extraVolumes" (list $ $engineName) | nindent 6 }} + {{- include "dremio.imagePullSecrets" $ | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: {{ template "dremio.executor.volumeClaimName" (list $ $engineName) }} + spec: + accessModes: ["ReadWriteOnce"] + {{- include "dremio.executor.storageClass" (list $ $engineName) | nindent 6 }} + resources: + requests: + storage: {{ template "dremio.executor.volumeSize" (list $ $engineName) }} + {{- include "dremio.executor.cloudCache.volumeClaimTemplate" (list $ $engineName) | nindent 2 }} +{{ end -}} +{{- end -}} diff --git a/charts/dremio_v2/templates/dremio-master.yaml b/charts/dremio_v2/templates/dremio-master.yaml new file mode 100644 index 00000000..f2f9b792 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-master.yaml @@ -0,0 +1,228 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: dremio-master + {{- include "dremio.coordinator.annotations" $ | nindent 2}} + {{- include "dremio.coordinator.labels" $ | nindent 2}} +spec: + serviceName: "dremio-cluster-pod" + podManagementPolicy: "Parallel" + replicas: 1 + selector: + matchLabels: + app: dremio-coordinator + template: + metadata: + labels: + app: dremio-coordinator + role: dremio-cluster-pod + {{- include "dremio.coordinator.podLabels" $ | nindent 8}} + annotations: + dremio-configmap/checksum: {{ (tpl ($.Files.Glob "config/*").AsConfig $) | sha256sum }} + {{- include "dremio.coordinator.podAnnotations" $ | nindent 8}} + spec: + {{- include "dremio.coordinator.serviceAccount" $ | nindent 6 }} + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - dremio-coordinator + topologyKey: "kubernetes.io/hostname" + terminationGracePeriodSeconds: 120 + {{- include "dremio.coordinator.nodeSelector" $ | nindent 6 }} + {{- include "dremio.coordinator.tolerations" $ | nindent 6 }} + containers: + - name: dremio-master-coordinator + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: {{ $.Values.coordinator.cpu }} + memory: {{ $.Values.coordinator.memory }}M + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2.d + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2-ee.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3-ee.d + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} + {{- include "dremio.coordinator.extraVolumeMounts" $ | nindent 8 }} + env: + - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.heapMemory" $ }}" + - name: DREMIO_MAX_DIRECT_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.directMemory" $ }}" + - name: DREMIO_JAVA_SERVER_EXTRA_OPTS + value: >- + {{- include "dremio.coordinator.extraStartParams" $ | nindent 12 -}} + -Dzookeeper=zk-hs:2181 + -Dservices.coordinator.enabled=true + -Dservices.coordinator.master.enabled=true + -Dservices.coordinator.master.embedded-zookeeper.enabled=false + -Dservices.executor.enabled=false + -Dservices.conduit.port=45679 + - name: AWS_CREDENTIAL_PROFILES_FILE + value: "/opt/dremio/aws/credentials" + - name: AWS_SHARED_CREDENTIALS_FILE + value: "/opt/dremio/aws/credentials" + command: ["/opt/dremio/bin/dremio"] + args: + - "start-fg" + ports: + - containerPort: 9047 + name: web + - containerPort: 31010 + name: client + - containerPort: 32010 + name: flight + - containerPort: 45678 + name: server-fabric + - containerPort: 45679 + name: server-conduit + readinessProbe: + httpGet: + path: / + {{- if $.Values.coordinator.web.tls.enabled }} + scheme: HTTPS + {{- end }} + port: 9047 + initialDelaySeconds: 5 + periodSeconds: 5 + initContainers: + {{- include "dremio.coordinator.extraInitContainers" $ | nindent 6 }} + - name: start-only-one-dremio-master + image: busybox + command: ["sh", "-c", "INDEX=${HOSTNAME##*-}; if [ $INDEX -ne 0 ]; then echo Only one master should be running.; exit 1; fi; "] + - name: wait-for-zookeeper + image: busybox + command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo Waiting for Zookeeper to be ready.; sleep 2; done;"] + - name: chown-data-directory + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + command: ["chown"] + args: + - "dremio:dremio" + - "/opt/dremio/data" + - name: upgrade-task + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + command: ["/opt/dremio/bin/dremio-admin"] + args: + - "upgrade" + {{- if $.Values.coordinator.web.tls.enabled }} + - name: generate-ui-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/ui.pkcs12", "-passout", "pass:"] + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: generate-client-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/client.pkcs12", "-passout", "pass:"] + {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: generate-flight-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-flight + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/flight.pkcs12", "-passout", "pass:"] + {{- end }} + volumes: + - name: dremio-config + configMap: + name: dremio-config + - name: dremio-hive2-config + configMap: + name: dremio-hive2-config + - name: dremio-hive3-config + configMap: + name: dremio-hive3-config + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if $.Values.coordinator.web.tls.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ $.Values.coordinator.web.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ $.Values.coordinator.client.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: dremio-tls-secret-flight + secret: + secretName: {{ $.Values.coordinator.flight.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- include "dremio.coordinator.extraVolumes" $ | nindent 6 }} + {{- include "dremio.imagePullSecrets" $ | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: dremio-master-volume + spec: + accessModes: ["ReadWriteOnce"] + {{- include "dremio.coordinator.storageClass" $ | nindent 6 }} + resources: + requests: + storage: {{ $.Values.coordinator.volumeSize }} +{{- end -}} diff --git a/charts/dremio_v2/templates/dremio-service-client.yaml b/charts/dremio_v2/templates/dremio-service-client.yaml new file mode 100644 index 00000000..7de85817 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-service-client.yaml @@ -0,0 +1,41 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: v1 +kind: Service +metadata: + name: dremio-client + labels: + app: dremio-client + {{- include "dremio.service.labels" $ | nindent 4 }} + {{- include "dremio.service.annotations" $ | nindent 2 }} +spec: + ports: + - port: {{ $.Values.coordinator.client.port | default 31010 }} + targetPort: client + name: client + - port: {{ $.Values.coordinator.web.port | default 9047 }} + targetPort: web + name: web + - port: {{ $.Values.coordinator.flight.port | default 32010 }} + targetPort: flight + name: flight + selector: + app: dremio-coordinator + type: {{ $.Values.service.type }} + {{- if and (eq $.Values.service.type "LoadBalancer") $.Values.service.loadBalancerIP }} + loadBalancerIP: {{ $.Values.service.loadBalancerIP }} + {{- end -}} + {{- if $.Values.service.sessionAffinity }} + sessionAffinity: {{ $.Values.service.sessionAffinity }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: dremio-cluster-pod +spec: + ports: + - port: 9999 + clusterIP: None + selector: + role: dremio-cluster-pod +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/zookeeper.yaml b/charts/dremio_v2/templates/zookeeper.yaml new file mode 100644 index 00000000..f5c87b9b --- /dev/null +++ b/charts/dremio_v2/templates/zookeeper.yaml @@ -0,0 +1,139 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: v1 +kind: Service +metadata: + name: zk-hs + labels: + app: zk +spec: + ports: + - port: 2181 + name: client + - port: 2888 + name: server + - port: 3888 + name: leader-election + clusterIP: None + selector: + app: zk +--- +apiVersion: v1 +kind: Service +metadata: + name: zk-cs + labels: + app: zk +spec: + ports: + - port: 2181 + name: client + selector: + app: zk +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: zk-pdb +spec: + selector: + matchLabels: + app: zk + maxUnavailable: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: zk + {{- include "dremio.zookeeper.annotations" $ | nindent 2 }} + {{- include "dremio.zookeeper.labels" $ | nindent 2 }} +spec: + selector: + matchLabels: + app: zk + serviceName: zk-hs + replicas: {{ $.Values.zookeeper.count }} + updateStrategy: + type: RollingUpdate + podManagementPolicy: Parallel + template: + metadata: + labels: + app: zk + {{- include "dremio.zookeeper.podLabels" $ | nindent 8 }} + {{- include "dremio.zookeeper.podAnnotations" $ | nindent 6 }} + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - zk + topologyKey: "kubernetes.io/hostname" + {{- include "dremio.zookeeper.nodeSelector" $ | nindent 6 }} + {{- include "dremio.zookeeper.tolerations" $ | nindent 6 }} + containers: + - name: kubernetes-zookeeper + imagePullPolicy: Always + image: "{{ $.Values.zookeeper.image }}:{{ $.Values.zookeeper.imageTag }}" + resources: + requests: + cpu: {{ $.Values.zookeeper.cpu }} + memory: {{ $.Values.zookeeper.memory }}M + ports: + - containerPort: 2181 + name: client + - containerPort: 2888 + name: server + - containerPort: 3888 + name: leader-election + command: + - sh + - -c + - "start-zookeeper \ + --servers={{ $.Values.zookeeper.count }} \ + --data_dir=/var/lib/zookeeper/data \ + --data_log_dir=/var/lib/zookeeper/data/log \ + --conf_dir=/opt/zookeeper/conf \ + --client_port=2181 \ + --election_port=3888 \ + --server_port=2888 \ + --tick_time=2000 \ + --init_limit=10 \ + --sync_limit=5 \ + --heap={{- template "dremio.zookeeper.memory" $ -}}M \ + --max_client_cnxns=60 \ + --snap_retain_count=3 \ + --purge_interval=12 \ + --max_session_timeout=40000 \ + --min_session_timeout=4000 \ + --log_level=INFO" + readinessProbe: + exec: + command: ["sh", "-c", "zookeeper-ready 2181"] + initialDelaySeconds: 10 + timeoutSeconds: 5 + livenessProbe: + exec: + command: ["sh", "-c", "zookeeper-ready 2181"] + initialDelaySeconds: 10 + timeoutSeconds: 5 + volumeMounts: + - name: datadir + mountPath: /var/lib/zookeeper + securityContext: + runAsUser: 1000 + fsGroup: 1000 + {{- include "dremio.imagePullSecrets" $ | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: datadir + spec: + accessModes: ["ReadWriteOnce"] + {{- include "dremio.zookeeper.storageClass" $ | nindent 6 }} + resources: + requests: + storage: {{ $.Values.zookeeper.volumeSize }} +{{- end -}} diff --git a/charts/dremio_v2/values.yaml b/charts/dremio_v2/values.yaml new file mode 100644 index 00000000..79112ecb --- /dev/null +++ b/charts/dremio_v2/values.yaml @@ -0,0 +1,471 @@ +# The Dremio image used in the cluster. +# +# It is *highly* recommended to update the version tag to +# the version that you are using. This will ensure that all +# the pods are using the same version of the software. +# +# Using latest will cause Dremio to potentially upgrade versions +# automatically during redeployments and may negatively impact +# the cluster. +image: dremio/dremio-oss +imageTag: latest + +# Annotations, labels, node selectors, and tolerations +# +# annotations: Annotations are applied to the StatefulSets that are deployed. +# podAnnotations: Pod annotations are applied to the pods that are deployed. +# labels: Labels operate much like annotations. +# podLabels: Labels that are applied to the pods that are deployed. +# nodeSelector: Target pods to nodes based on labels set on the nodes. For more +# information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# tolerations: Tolerations allow the negation of taints that have been applied to some set of nodes +# in the Kubernetes cluster so that pods can be scheduled on those tainted nodes. +annotations: {} +podAnnotations: {} +labels: {} +podLabels: {} +nodeSelector: {} +tolerations: [] + +# Dremio Coordinator +coordinator: + # CPU & Memory + # Memory allocated to each coordinator, expressed in MB. + # CPU allocated to each coordinator, expressed in CPU cores. + cpu: 15 + memory: 122800 + + # This count is used for slave coordinators only. + # The total number of coordinators will always be count + 1. + count: 0 + + # Coordinator data volume size (applies to the master coordinator only). + # In most managed Kubernetes environments (AKS, GKE, etc.), the size of the disk has a direct impact on + # the provisioned and maximum performance of the disk. + volumeSize: 128Gi + + # Kubernetes Service Account + # Uncomment below to use a custom Kubernetes service account for the coordinator. + #serviceAccount: "" + + # Uncomment the lines below to use a custom set of extra startup parameters for the coordinator. + #extraStartParams: >- + # -DsomeKey=someValue + + # Extra Init Containers + # Uncomment the below lines to use a custom set of extra init containers for the coordinator. + #extraInitContainers: | + # - name: extra-init-container + # image: {{ $.Values.image }}:{{ $.Values.imageTag }} + # command: ["echo", "Hello World"] + + # Extra Volumes + # Uncomment below to use a custom set of extra volumes for the coordinator. + #extraVolumes: [] + + # Extra Volume Mounts + # Uncomment below to use a custom set of extra volume mounts for the coordinator. + #extraVolumeMounts: [] + + # Uncomment this value to use a different storage class for the coordinator. + #storageClass: + + # These values, when defined, override the provided shared annotations, labels, node selectors, or tolerations. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #podAnnotations: {} + #labels: {} + #podLabels: {} + #nodeSelector: {} + #tolerations: [] + + # Web UI + web: + port: 9047 + tls: + # To enable TLS for the web UI, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. + enabled: false + + # To create a TLS secret, use the following command: + # kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} + secret: dremio-tls-secret-ui + + # ODBC/JDBC Client + client: + port: 31010 + tls: + # To enable TLS for the client endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. Client + # endpoint encryption is available only on Dremio Enterprise + # Edition and should not be enabled otherwise. + enabled: false + + # To create a TLS secret, use the following command: + # kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} + secret: dremio-tls-secret-client + + # Flight Client + flight: + port: 32010 + tls: + # To enable TLS for the Flight endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. + enabled: false + + # To create a TLS secret, use the following command: + # kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} + secret: dremio-tls-secret-flight + +# Dremio Executor +executor: + # CPU & Memory + # Memory allocated to each executor, expressed in MB. + # CPU allocated to each executor, expressed in CPU cores. + cpu: 15 + memory: 122800 + + # Engines + # Engine names be 47 characters or less and be lowercase alphanumber characters or '-'. + # Note: The number of executor pods will be the length of the array below * count. + engines: ["default"] + count: 3 + + # Executor volume size. + volumeSize: 128Gi + + # Kubernetes Service Account + # Uncomment below to use a custom Kubernetes service account for executors. + #serviceAccount: "" + + # Uncomment the lines below to use a custom set of extra startup parameters for executors. + #extraStartParams: >- + # -DsomeKey=someValue + + # Extra Init Containers + # Uncomment the below lines to use a custom set of extra init containers for executors. + #extraInitContainers: | + # - name: extra-init-container + # image: {{ $.Values.image }}:{{ $.Values.imageTag }} + # command: ["echo", "Hello World"] + + # Extra Volumes + # Uncomment below to use a custom set of extra volumes for executors. + #extraVolumes: [] + + # Extra Volume Mounts + # Uncomment below to use a custom set of extra volume mounts for executors. + #extraVolumeMounts: [] + + # Uncomment this value to use a different storage class for executors. + #storageClass: + + # Dremio C3 + # Designed for use with NVMe storage devices, performance may be impacted when using + # persistent volume storage that resides far from the physical node. + cloudCache: + enabled: true + + # Uncomment this value to use a different storage class for C3. + #storageClass: + + # Volumes to use for C3, specify multiple volumes if there are more than one local + # NVMe disk that you would like to use for C3. + # + # The below example shows all valid options that can be provided for a volume. + # volumes: + # - name: "dremio-default-c3" + # size: 100Gi + # storageClass: "local-nvme" + volumes: + - size: 100Gi + + # These values, when defined and not empty, override the provided shared annotations, labels, node selectors, or tolerations. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #podAnnotations: {} + #labels: {} + #podLabels: {} + #nodeSelector: {} + #tolerations: [] + + # Engine Overrides + # + # The settings above are overridable on a per-engine basis. These + # values here will take precedence and *override* the configured values + # on a per-engine basis. Engine overrides are matched with the name in the above + # list of engines. + # + # Special per-engine parameters: + # volumeClaimName: For each engine, you can optionally specify a value for the volume claim name, + # this value must be unique to each engine or may cause unintended consequences. This value is + # primarily intended for transitioning an existing single engine to a multi-engine configuration + # where there may already have been existing persistent volumes. + # + # The below example shows all valid options that can be overridden on a per-engine basis. + # engineOverride: + # engineNameHere: + # cpu: 1 + # memory: 122800 + # + # count: 1 + # + # annotations: {} + # podAnnotations: {} + # labels: {} + # podLabels: {} + # nodeSelector: {} + # tolerations: [] + # + # serviceAccount: "" + # + # extraStartParams: >- + # -DsomeCustomKey=someCustomValue + # + # extraInitContainers: | + # - name: extra-init-container + # image: {{ $.Values.image }}:{{ $.Values.imageTag }} + # command: ["echo", "Hello World"] + # + # + # extraVolumes: [] + # extraVolumeMounts: [] + # + # volumeSize: 50Gi + # storageClass: managed-premium + # volumeClaimName: dremio-default-executor-volume + # + # cloudCache: + # enabled: true + # + # storageClass: "" + # + # volumes: + # - name: "default-c3" + # size: 100Gi + # storageClass: "" + +# Zookeeper +zookeeper: + # The Zookeeper image used in the cluster. + image: k8s.gcr.io/kubernetes-zookeeper + imageTag: 1.0-3.4.10 + + # CPU & Memory + # Memory allocated to each zookeeper, expressed in MB. + # CPU allocated to each zookeeper, expressed in CPU cores. + cpu: 0.5 + memory: 1024 + count: 3 + + volumeSize: 10Gi + + # Kubernetes Service Account + # Uncomment below to use a custom Kubernetes service account for Zookeeper. + #serviceAccount: "" + + # Uncomment this value to use a different storage class for Zookeeper. + #storageClass: + + # These values, when defined, override the provided shared annotations, labels, node selectors, or tolerations. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #podAnnotations: {} + #labels: {} + #podLabels: {} + #nodeSelector: {} + #tolerations: [] + +# Control where uploaded files are stored for Dremio. +# For more information, see https://docs.dremio.com/deployment/distributed-storage.html +distStorage: + # The supported distributed storage types are: local, aws, azure, gcp, or azureStorage. + # + # local: Not recommended for production use. When using local, dist-caching is disabled. + # aws: AWS S3, additional parameters required, see "aws" section. + # azure: ADLS Gen 1, additional parameters required, see "azure" section. + # azureStorage: Azure Storage Gen2, additional paramters required, see "azureStorage" section. + # gcp: Google Cloud Storage, additional parameters required, see "gcp" section. + type: "local" + + # Google Cloud Storage + # + # bucketName: The name of the GCS bucket for distributed storage. + # path: The path, relative to the bucket, to create Dremio's directories. + # authentication: Valid types are: serviceAccountKeys or auto. + # - When using "auto" authentication, Dremio uses Google Application Default Credentials to + # authenticate. This is platform dependent and may not be available in all Kubernetes clusters. + # - Note: When using a GCS bucket on GKE, we recommend enabling Workload Identity and configuring + # a Kubernetes Service Accountfor Dremio with an associated workload identity that + # has access to the GCS bucket. + # credentials: If using serviceAccountKeys authentication, uncomment the credentials section below. + gcp: + bucketName: "GCS Bucket Name" + path: "/" + authentication: "auto" + + # If using serviceAccountKeys, uncomment the section below, referencing the values from + # the service account credentials JSON file that you generated: + # + #credentials: + # projectId: GCP Project ID that the Google Cloud Storage bucket belongs to. + # clientId: Client ID for the service account that has access to Google Cloud Storage bucket. + # clientEmail: Email for the service account that has access to Google Cloud Storage bucket. + # privateKeyId: Private key ID for the service account that has access to Google Cloud Storage bucket. + # privateKey: |- + # -----BEGIN PRIVATE KEY-----\n Replace me with full private key value. \n-----END PRIVATE KEY-----\n + + # Extra Properties + # Use the extra properties block to provide additional parameters to configure the distributed + # storage in the generated core-site.xml file. + # + #extraProperties: | + # + # + # + # + + # AWS S3 + # For more details of S3 configuration, see https://docs.dremio.com/deployment/dist-store-config.html#amazon-s3 + # + # bucketName: The name of the S3 bucket for distributed storage. + # path: The path, relative to the bucket, to create Dremio's directories. + # authentication: Valid types are: accessKeySecret, instanceMetadata, or awsProfile. + # - Note: Instance metadata is only supported in AWS EKS and requires that the + # EKS worker node IAM role is configured with sufficient access rights. At this time, + # Dremio does not support using an K8s service account based IAM role. + # credentials: If using accessKeySecret authentication, uncomment the credentials section below. + aws: + bucketName: "AWS Bucket Name" + path: "/" + authentication: "metadata" + # If using accessKeySecret for authentication against S3, uncomment the lines below and use the values + # to configure the appropriate credentials. + # + #credentials: + # accessKey: "AWS Access Key" + # secret: "AWS Secret" + # + # If using awsProfile for authentication against S3, uncomment the lines below and use the values + # to choose the appropriate profile. + # + #credentials: + # awsProfileName: "default" + # + # Extra Properties + # Use the extra properties block to provide additional parameters to configure the distributed + # storage in the generated core-site.xml file. + # + #extraProperties: | + # + # + # + # + + # Azure ADLS Gen 1 + # For more details of Azure ADLS Gen 1 storage configuration, see + # https://docs.dremio.com/deployment/dist-store-config.html#azure-data-lake-storage-gen1 + # + # datalakeStoreName: The ADLS Gen 1 + azure: + datalakeStoreName: "Azure DataLake Store Name" + path: "/" + credentials: + applicationId: "Azure Application ID" + secret: "Azure Application Secret" + oauth2Endpoint: "Azure OAuth2 Endpoint" + + # Extra Properties + # Use the extra properties block to provide additional parameters to configure the distributed + # storage in the generated core-site.xml file. + # + #extraProperties: | + # + # + # + # + + # Azure Storage Gen2 + # For more details of Azure Storage Gen2 storage configuration, see + # https://docs.dremio.com/deployment/dist-store-config.html#azure-storage + # + # accountName: The name of the storage account. + # filesystem: The name of the blob container to use within the storage account. + # path: The path, relative to the filesystem, to create Dremio's directories. + # credentials: + azureStorage: + accountName: "Azure Storage Account Name" + filesystem: "Azure Storage Account Blob Container" + path: "/" + credentials: + accessKey: "Azure Storage Account Access Key" + + # Extra Properties + # Use the extra properties block to provide additional parameters to configure the distributed + # storage in the generated core-site.xml file. + # + #extraProperties: | + # + # + # + # + +# Dremio Start Parameters +# Uncomment the below lines to provide extra start paramaters to be passed directly to Dremio during startup. +#extraStartParams: >- +# -DsomeKey=someValue + +# Extra Init Containers +# Uncomment the below lines to provide extra init containers to be run first. +#extraInitContainers: | +# - name: extra-init-container +# image: {{ $.Values.image }}:{{ $.Values.imageTag }} +# command: ["echo", "Hello World"] + +# Kubernetes Service Account +# Uncomment the below line to provide a Kubernetes service account that Dremio should run with. +#serviceAccount: "" + +# Extra Volumes +# Array to add extra volumes to all Dremio resources. +extraVolumes: [] + +# Extra Volume Mounts +# Array to add extra volume mounts to all Dremio resources, normally used in conjunction wtih extraVolumes. +extraVolumeMounts: [] + +# Dremio Service +# The dremio-client service exposes the service for access outside of the Kubernetes cluster. +service: + type: LoadBalancer + + # These values, when defined and not empty, override the provided shared annotations and labels. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #labels: {} + + # If the loadBalancer supports sessionAffinity and you have more than one coordinator, + # uncomment the below line to enable session affinity. + #sessionAffinity: ClientIP + + # Enable the following flag if you wish to route traffic through a shared VPC + # for the LoadBalancer's external IP. + # The chart is setup for internal IP support for AKS, EKS, GKE. + # For more information, see https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer + #internalLoadBalancer: true + + # If you have a static IP allocated for your load balancer, uncomment the following + # line and set the IP to provide the static IP used for the load balancer. + # Note: The service type must be set to LoadBalancer for this value to be used. + #loadBalancerIP: 0.0.0.0 + +# To use custom storage class, uncomment below. +# Otherwise the default storage class configured for your K8s cluster is used. +#storageClass: managed-premium + +# For private and protected docker image repository, you should store +# the credentials in a kubernetes secret and provide the secret name +# here. For more information, see +# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +# imagePullSecrets: +# - secretname \ No newline at end of file diff --git a/utils/Dockerfile b/utils/Dockerfile new file mode 100644 index 00000000..da700d1d --- /dev/null +++ b/utils/Dockerfile @@ -0,0 +1,38 @@ +################################################################ +# Image with useful cloud tools installed: +# - aws cli +# - eksctl +# - azure cli +# - gcloud +# - kubectl +# - helm +# - git +# +# An image built with this is published in Dockerhub as +# dremio/cloud-tools +# +################################################################ +FROM centos + +ADD helm-init.sh /usr/local/bin + +RUN \ + yum install -y epel-release && \ + yum install -y which openssl git python-pip && \ + pip install --upgrade pip && \ + pip install awscli && \ + curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/latest_release/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && \ + mv /tmp/eksctl /usr/local/bin && \ + curl -s -o aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.13.7/2019-06-11/bin/linux/amd64/aws-iam-authenticator && \ + chmod +x ./aws-iam-authenticator && \ + mv aws-iam-authenticator /usr/local/bin && \ + curl -s -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \ + chmod +x kubectl && \ + mv kubectl /usr/local/bin && \ + curl -s -L https://git.io/get_helm.sh | bash && \ + rpm --import https://packages.microsoft.com/keys/microsoft.asc && \ + sh -c 'echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" > /etc/yum.repos.d/azure-cli.repo' && \ + yum install -y azure-cli && \ + curl https://sdk.cloud.google.com | bash + +WORKDIR /root diff --git a/utils/helm-init.sh b/utils/helm-init.sh new file mode 100755 index 00000000..4029ff5d --- /dev/null +++ b/utils/helm-init.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +if ! command -v helm 2>&1 > /dev/null; then + echo "Helm not found. Installing helm..." + curl -L https://git.io/get_helm.sh | bash + if ! command -v helm 2>&1 > /dev/null; then + echo "Failed installation of Helm. Please check the script and debug. " + exit 1 + fi + echo "Helm successfully installed on your machine." +fi +kubectl create serviceaccount -n kube-system tiller +kubectl create clusterrolebinding tiller-binding --clusterrole=cluster-admin --serviceaccount kube-system:tiller +helm init --service-account tiller --wait