Skip to content

Commit 71ecb1d

Browse files
author
donaldh
authored
Merge pull request #100 from cgiraldo/containers
Support for non ipynb files in jupyter component
2 parents e9cb295 + 034f1a8 commit 71ecb1d

25 files changed

+406
-255
lines changed

Dockerfile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
FROM centos:7.6.1810
2+
3+
RUN yum install -y epel-release
4+
RUN yum install -y python36-devel python36-pip cyrus-sasl-devel cyrus-sasl-gssapi cyrus-sasl-plain libffi-devel openssl-devel gcc gcc-c++
5+
6+
COPY api/src/main/resources /deployment-manager/
7+
8+
WORKDIR /deployment-manager
9+
10+
RUN pip3 install -r requirements.txt
11+
12+
# GIT http server to serve notebooks to jupyterhub
13+
RUN yum install -y git nginx fcgiwrap && \
14+
git config --global user.email "[email protected]" && \
15+
git config --global user.name "pnda" && \
16+
mkdir -p /data/git-repos/ && \
17+
mkdir -p /data/stage/
18+
COPY docker/nginx.conf /etc/nginx/nginx.conf
19+
COPY docker/entrypoint.sh /entrypoint.sh
20+
# PNDA platform users must transition from Linux SO users to cloud-native. For now we add a pnda user to container images.
21+
RUN useradd pnda
22+
23+
ENTRYPOINT "/entrypoint.sh"
24+

Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
NAME=platform-deployment-manager
2+
VERSION=release5.0
3+
REGISTRY=pnda
4+
5+
build: ## Build the docker image
6+
docker build -t "$(NAME):$(VERSION)" -f Dockerfile .
7+
8+
upload: ## Upload image to registry
9+
docker tag "$(NAME):$(VERSION)" "$(REGISTRY)/$(NAME):$(VERSION)"
10+
docker push "$(REGISTRY)/$(NAME):$(VERSION)"
11+
12+
help: ## This help
13+
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
14+
15+
.PHONY: help
16+
.DEFAULT_GOAL := help

api/src/main/resources/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def main():
328328
package_registrar.HbasePackageRegistrar(
329329
config['environment']['hbase_thrift_server'],
330330
config['environment']['webhdfs_host'],
331-
'hdfs',
331+
config['environment']['webhdfs_user'],
332332
config['environment']['webhdfs_port'],
333333
config['config']['stage_root']),
334334
application_registrar.HbaseApplicationRegistrar(

api/src/main/resources/application_creator.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,10 @@ def __init__(self, config, environment, service):
4444
self._name_regex = re.compile('')
4545
self._hdfs_client = HDFS(environment['webhdfs_host'],
4646
environment['webhdfs_port'],
47-
'hdfs')
47+
environment['webhdfs_user'])
4848

4949
def assert_application_properties(self, override_properties, default_properties):
50-
for component_type, component_properties in default_properties.iteritems():
50+
for component_type, component_properties in default_properties.items():
5151
creator = self._load_creator(component_type)
5252
creator.assert_application_properties(override_properties.get(component_type, {}), component_properties)
5353

@@ -70,7 +70,7 @@ def create_application(self, package_data_path, package_metadata, application_na
7070
# component specific return data for destruction
7171
create_metadata = {}
7272
try:
73-
for component_type, components in package_metadata['component_types'].iteritems():
73+
for component_type, components in package_metadata['component_types'].items():
7474
creator = self._load_creator(component_type)
7575
result = creator.create_components(stage_path,
7676
application_name,
@@ -89,7 +89,7 @@ def destroy_application(self, application_name, application_create_data):
8989
logging.debug("destroy_application: %s %s", application_name, application_create_data)
9090

9191
app_hdfs_root = None
92-
for component_type, component_create_data in application_create_data.iteritems():
92+
for component_type, component_create_data in application_create_data.items():
9393
creator = self._load_creator(component_type)
9494
creator.destroy_components(application_name, component_create_data)
9595
if component_create_data and 'application_hdfs_root' in component_create_data[0]:
@@ -106,15 +106,15 @@ def start_application(self, application_name, application_create_data):
106106

107107
logging.debug("start_application: %s %s", application_name, application_create_data)
108108

109-
for component_type, component_create_data in application_create_data.iteritems():
109+
for component_type, component_create_data in application_create_data.items():
110110
creator = self._load_creator(component_type)
111111
creator.start_components(application_name, component_create_data)
112112

113113
def stop_application(self, application_name, application_create_data):
114114

115115
logging.debug("stop_application: %s %s", application_name, application_create_data)
116116

117-
for component_type, component_create_data in application_create_data.iteritems():
117+
for component_type, component_create_data in application_create_data.items():
118118
creator = self._load_creator(component_type)
119119
creator.stop_components(application_name, component_create_data)
120120

@@ -124,7 +124,7 @@ def validate_package(self, package_name, package_metadata):
124124

125125
result = {}
126126
self._validate_name(package_name, package_metadata)
127-
for component_type, component_metadata in package_metadata['component_types'].iteritems():
127+
for component_type, component_metadata in package_metadata['component_types'].items():
128128
creator = self._load_creator(component_type)
129129
validation_errors = creator.validate_components(component_metadata)
130130
if validation_errors:
@@ -152,7 +152,7 @@ def get_application_runtime_details(self, application_name, application_create_d
152152

153153
details = {}
154154
details['yarn_applications'] = {}
155-
for component_type, component_create_data in application_create_data.iteritems():
155+
for component_type, component_create_data in application_create_data.items():
156156
creator = self._load_creator(component_type)
157157
type_details = creator.get_component_runtime_details(component_create_data)
158158
details['yarn_applications'].update(type_details['yarn_applications'])

api/src/main/resources/application_detailed_summary.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _do_generate():
7171
try:
7272
create_data = self._application_registrar.get_create_data(application)
7373
input_data = {}
74-
for component_name, component_data in create_data.iteritems():
74+
for component_name, component_data in create_data.items():
7575
input_data[component_name] = {}
7676
input_data[component_name]["component_ref"] = self._load_creator(component_name)
7777
input_data[component_name]["component_data"] = component_data

api/src/main/resources/application_registrar.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from Hbase_thrift import AlreadyExists
2727

2828
from lifecycle_states import ApplicationState
29+
from hbase_utils import encode,decode
2930

3031

3132
class HbaseApplicationRegistrar(object):
@@ -101,7 +102,7 @@ def list_applications(self):
101102
connection = happybase.Connection(self._hbase_host)
102103
try:
103104
table = connection.table(self._table_name)
104-
result = [key for key, data in table.scan(columns=['cf:status']) if data['cf:status'] != ApplicationState.NOTCREATED]
105+
result = [decode(key) for key, data in table.scan(columns=[b'cf:status']) if decode(data[b'cf:status']) != ApplicationState.NOTCREATED]
105106
finally:
106107
connection.close()
107108
return result
@@ -112,8 +113,8 @@ def list_applications_for_package(self, package_name):
112113
connection = happybase.Connection(self._hbase_host)
113114
try:
114115
table = connection.table(self._table_name)
115-
result = [key for key, data in table.scan(columns=['cf:package_name', 'cf:status'])
116-
if data['cf:package_name'] == package_name and data['cf:status'] != ApplicationState.NOTCREATED]
116+
result = [key for key, data in table.scan(columns=[b'cf:package_name', b'cf:status'])
117+
if decode(data[b'cf:package_name']) == package_name and decode(data[b'cf:status']) != ApplicationState.NOTCREATED]
117118
finally:
118119
connection.close()
119120
return result
@@ -131,15 +132,15 @@ def _read_from_db(self, key):
131132
connection = happybase.Connection(self._hbase_host)
132133
try:
133134
table = connection.table(self._table_name)
134-
data = table.row(key)
135+
data = table.row(encode(key))
135136
finally:
136137
connection.close()
137-
return data
138+
return decode(data)
138139

139140
def _write_to_db(self, key, data):
140141
connection = happybase.Connection(self._hbase_host)
141142
try:
142143
table = connection.table(self._table_name)
143-
table.put(key, data)
144+
table.put(encode(key), encode(data))
144145
finally:
145146
connection.close()

api/src/main/resources/application_summary_registrar.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,19 +85,19 @@ def get_dm_status(self, key):
8585
try:
8686
connection = happybase.Connection(self._hbase_host)
8787
table = connection.table("platform_applications")
88-
row = table.row(key, columns=['cf:status'])
89-
status = row['cf:status']
88+
row = table.row(key, columns=[b'cf:status'])
89+
status = row[b'cf:status']
9090
except TTransportException as error_message:
9191
logging.error(str(error_message))
9292
finally:
9393
connection.close()
94-
return status
94+
return status.decode()
9595

9696
def get_flink_job_id(self, key):
9797
jid = ''
9898
data = self._read_from_db(key)
9999
if data:
100-
data = json.loads(data['cf:component_data'])
100+
data = json.loads(data[b'cf:component_data'])
101101
for component in data:
102102
if 'flink' in component:
103103
tracking_url = data[component]['tracking_url']
@@ -111,9 +111,9 @@ def get_summary_data(self, application):
111111
summary_data = self._read_from_db(application)
112112
if summary_data:
113113
record[application].update({
114-
'aggregate_status': summary_data['cf:aggregate_status']
114+
'aggregate_status': summary_data[b'cf:aggregate_status']
115115
})
116-
record[application].update(json.loads(summary_data['cf:component_data']))
116+
record[application].update(json.loads(summary_data[b'cf:component_data']))
117117
else:
118118
record[application].update({
119119
'status': 'Not Available'

api/src/main/resources/authorizer_local.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def authorize(self, identity, resource, action):
5757
authorize = True
5858
logging.debug("authorize: %s for %s", authorize, grant_rule)
5959
break
60-
except KeyError, ex:
60+
except KeyError as ex:
6161
logging.warning("missing attribute %s", ex)
6262

6363
if not authorize:

api/src/main/resources/deployer_utils.py

Lines changed: 7 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import os
2424
import tarfile
25-
import StringIO
25+
from io import BytesIO
2626
import logging
2727
import traceback
2828
import time
@@ -32,18 +32,6 @@
3232
import spur
3333
from pywebhdfs.webhdfs import PyWebHdfsClient
3434

35-
from cm_api.api_client import ApiResource
36-
37-
38-
def connect_cm(cm_api, cm_username, cm_password):
39-
api = ApiResource(
40-
cm_api,
41-
version=6,
42-
username=cm_username,
43-
password=cm_password)
44-
return api
45-
46-
4735
def get_nameservice(cm_host, cluster_name, service_name, user_name='admin', password='admin'):
4836
request_url = 'http://%s:7180/api/v11/clusters/%s/services/%s/nameservices' % (cm_host,
4937
cluster_name,
@@ -66,9 +54,11 @@ def update_hadoop_env(env):
6654
tmp_env = dict(env)
6755
logging.debug('Updating environment descriptor')
6856
if env['hadoop_distro'] == 'CDH':
69-
fill_hadoop_env_cdh(tmp_env)
70-
else:
57+
logging.error('CDH is not a supported hadoop distribution')
58+
elif env['hadoop_distro'] == 'HDP':
7159
fill_hadoop_env_hdp(tmp_env)
60+
else:
61+
logging.warning('Skipping update_hadoop_env for hadoop distro "%s"', env['hadoop_distro'])
7262
logging.debug('Updated environment descriptor')
7363
for key in tmp_env:
7464
# Dictionary get/put operations are atomic so inherently thread safe and don't need a lock
@@ -182,105 +172,6 @@ def fill_hadoop_env_hdp(env):
182172
env['hive_server'] = '%s' % component_host(component_detail)
183173
env['hive_port'] = '10001'
184174

185-
def fill_hadoop_env_cdh(env):
186-
# pylint: disable=E1103
187-
api = connect_cm(
188-
env['hadoop_manager_host'],
189-
env['hadoop_manager_username'],
190-
env['hadoop_manager_password'])
191-
192-
for cluster_detail in api.get_all_clusters():
193-
cluster_name = cluster_detail.name
194-
break
195-
196-
logging.debug('getting %s', cluster_name)
197-
env['cm_status_links'] = {}
198-
env.pop('yarn_node_managers', None)
199-
env.pop('yarn_resource_manager_host', None)
200-
env.pop('zookeeper_quorum', None)
201-
202-
cluster = api.get_cluster(cluster_name)
203-
for service in cluster.get_all_services():
204-
env['cm_status_links']['%s' % service.name] = service.serviceUrl
205-
if service.type == "HDFS":
206-
nameservice = get_nameservice(env['hadoop_manager_host'], cluster_name,
207-
service.name,
208-
user_name=env['hadoop_manager_username'],
209-
password=env['hadoop_manager_password'])
210-
if nameservice:
211-
env['name_node'] = 'hdfs://%s' % nameservice
212-
for role in service.get_all_roles():
213-
if not nameservice and role.type == "NAMENODE":
214-
env['name_node'] = 'hdfs://%s:8020' % api.get_host(role.hostRef.hostId).hostname
215-
if role.type == "HTTPFS":
216-
env['webhdfs_host'] = '%s' % api.get_host(role.hostRef.hostId).hostname
217-
env['webhdfs_port'] = '14000'
218-
elif service.type == "YARN":
219-
for role in service.get_all_roles():
220-
if role.type == "RESOURCEMANAGER":
221-
if 'yarn_resource_manager_host' in env:
222-
rm_instance = '_backup'
223-
else:
224-
rm_instance = ''
225-
env['yarn_resource_manager_host%s' % rm_instance] = '%s' % api.get_host(role.hostRef.hostId).hostname
226-
env['yarn_resource_manager_port%s' % rm_instance] = '8088'
227-
env['yarn_resource_manager_mr_port%s' % rm_instance] = '8032'
228-
if role.type == "NODEMANAGER":
229-
if 'yarn_node_managers' in env:
230-
env['yarn_node_managers'] = '%s,%s' % (env['yarn_node_managers'], api.get_host(role.hostRef.hostId).hostname)
231-
else:
232-
env['yarn_node_managers'] = '%s' % api.get_host(
233-
role.hostRef.hostId).hostname
234-
elif service.type == "MAPREDUCE":
235-
for role in service.get_all_roles():
236-
if role.type == "JOBTRACKER":
237-
env['job_tracker'] = '%s:8021' % api.get_host(role.hostRef.hostId).hostname
238-
break
239-
elif service.type == "ZOOKEEPER":
240-
for role in service.get_all_roles():
241-
if role.type == "SERVER":
242-
if 'zookeeper_quorum' in env:
243-
env['zookeeper_quorum'] += ',%s' % api.get_host(role.hostRef.hostId).hostname
244-
else:
245-
env['zookeeper_quorum'] = '%s' % api.get_host(role.hostRef.hostId).hostname
246-
env['zookeeper_port'] = '2181'
247-
elif service.type == "HBASE":
248-
for role in service.get_all_roles():
249-
if role.type == "HBASERESTSERVER":
250-
env['hbase_rest_server'] = '%s' % api.get_host(role.hostRef.hostId).hostname
251-
env['hbase_rest_port'] = '20550'
252-
elif role.type == "HBASETHRIFTSERVER":
253-
env['hbase_thrift_server'] = '%s' % api.get_host(role.hostRef.hostId).hostname
254-
elif service.type == "OOZIE":
255-
for role in service.get_all_roles():
256-
if role.type == "OOZIE_SERVER":
257-
env['oozie_uri'] = 'http://%s:11000/oozie' % api.get_host(role.hostRef.hostId).hostname
258-
break
259-
elif service.type == "HIVE":
260-
for role in service.get_all_roles():
261-
if role.type == "HIVESERVER2":
262-
env['hive_server'] = '%s' % api.get_host(role.hostRef.hostId).hostname
263-
env['hive_port'] = '10000'
264-
break
265-
elif service.type == "IMPALA":
266-
for role in service.get_all_roles():
267-
if role.type == "IMPALAD":
268-
env['impala_host'] = '%s' % api.get_host(role.hostRef.hostId).hostname
269-
env['impala_port'] = '21050'
270-
break
271-
elif service.type == "KUDU":
272-
for role in service.get_all_roles():
273-
if role.type == "KUDU_MASTER":
274-
env['kudu_host'] = '%s' % api.get_host(role.hostRef.hostId).hostname
275-
env['kudu_port'] = '7051'
276-
break
277-
elif service.type == "HUE":
278-
for role in service.get_all_roles():
279-
if role.type == "HUE_SERVER":
280-
env['hue_host'] = '%s' % api.get_host(role.hostRef.hostId).hostname
281-
env['hue_port'] = '8888'
282-
break
283-
284175
def tree(archive_filepath):
285176
file_handle = file(archive_filepath, 'rb')
286177
tar_file = tarfile.open(None, 'r', file_handle)
@@ -297,7 +188,6 @@ def tree(archive_filepath):
297188

298189
return root
299190

300-
301191
def canonicalize(path):
302192
path = path.replace('\\', '/')
303193
path = path.replace('//', '/')
@@ -353,7 +243,7 @@ def create_file(self, data, remote_file_path, permission=755):
353243

354244
logging.debug('create_file: %s', remote_file_path)
355245

356-
sio = StringIO.StringIO(data)
246+
sio = BytesIO(data)
357247

358248
self._hdfs.create_file(
359249
canonicalize(remote_file_path),
@@ -420,7 +310,7 @@ def exec_ssh(host, user, key, ssh_commands):
420310

421311
def dict_to_props(dict_props):
422312
props = []
423-
for key, value in dict_props.iteritems():
313+
for key, value in dict_props.items():
424314
props.append('%s=%s' % (key, value))
425315
return '\n'.join(props)
426316

0 commit comments

Comments
 (0)