Skip to content

Commit 4a0f929

Browse files
Merge branch 'develop' into PNDA-4012
2 parents 4f802d9 + 2e4352a commit 4a0f929

File tree

8 files changed

+252
-184
lines changed

8 files changed

+252
-184
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
All notable changes to this project will be documented in this file.
33

44
## [Unreleased]
5+
### Changed
6+
- PNDA-4405: Require user and package fields in create application API
7+
- PNDA-4389: Reject packages containing upstart.conf files
8+
59
### Fixed
610
- PNDA-4218: Fix application deletion when app name is an HDFS username
711

api/src/main/resources/app.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,20 @@ class ApplicationHandler(BaseHandler):
256256
def put(self, aname):
257257
try:
258258
request_body = json.loads(self.request.body)
259-
pname = request_body['package']
260-
except:
259+
except ValueError:
261260
self.send_client_error("Invalid request body")
262261
return
263262

263+
if 'package' not in request_body:
264+
self.send_client_error("Invalid request body. Missing field 'package'")
265+
return
266+
267+
if 'user' not in request_body:
268+
self.send_client_error("Invalid request body. Missing field 'user'")
269+
return
270+
264271
def do_call():
265-
dm.create_application(pname, aname, request_body)
272+
dm.create_application(request_body['package'], aname, request_body)
266273
self.send_accepted()
267274

268275
DISPATCHER.run_as_asynch(task=do_call, on_error=self.handle_error)

api/src/main/resources/application_summary.py

Lines changed: 117 additions & 106 deletions
Large diffs are not rendered by default.

api/src/main/resources/deployer_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def fill_hadoop_env_cdh(env):
213213
if not nameservice and role.type == "NAMENODE":
214214
env['name_node'] = 'hdfs://%s:8020' % api.get_host(role.hostRef.hostId).hostname
215215
if role.type == "HTTPFS":
216-
env['webhdfs_host'] = '%s' % api.get_host(role.hostRef.hostId).ipAddress
216+
env['webhdfs_host'] = '%s' % api.get_host(role.hostRef.hostId).hostname
217217
env['webhdfs_port'] = '14000'
218218
elif service.type == "YARN":
219219
for role in service.get_all_roles():

api/src/main/resources/plugins/oozie.py

Lines changed: 78 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@
2222

2323
# pylint: disable=C0103
2424

25+
import os
2526
import json
2627
import logging
2728
import datetime
2829
import xml.etree.ElementTree as ElementTree
2930
import commands
31+
import shutil
32+
import traceback
3033
import requests
3134

3235
import deployer_utils
@@ -111,41 +114,28 @@ def create_component(self, staged_component_path, application_name, user_name, c
111114
properties[def_path] = '%s/%s' % (self._environment['name_node'], remote_path)
112115

113116
# deploy everything to various hadoop services
114-
undeploy = self._deploy_to_hadoop(properties, staged_component_path, remote_path, properties['application_user'])
117+
undeploy = self._deploy_to_hadoop(component, properties, staged_component_path, remote_path, properties['application_user'])
115118

116119
# return something that can be used to undeploy later
117120
return {'job_handle': undeploy['id'],
118121
'component_hdfs_root': properties['component_hdfs_root'],
119122
'application_user': properties['application_user']}
120123

121-
def _deploy_to_hadoop(self, properties, staged_component_path, remote_path, application_user, exclude=None):
122-
if exclude is None:
123-
exclude = []
124-
exclude.extend(['hdfs.json',
125-
'hbase.json',
126-
'properties.json',
127-
'application.properties'])
128-
129-
# stage the component files to hdfs
130-
self._hdfs_client.recursive_copy(staged_component_path, remote_path, exclude=exclude, permission=755)
131-
132-
# stage the instantiated job properties back to HDFS - no functional purpose,
133-
# just helps developers understand what has happened
134-
effective_job_properties = deployer_utils.dict_to_props(properties)
135-
self._hdfs_client.create_file(effective_job_properties, '%s/application.properties' % remote_path)
136-
124+
def _setup_queue_config(self, component, staged_component_path, properties):
137125
# Add queue config into the default config if none is defined.
138126
if 'mapreduce.job.queuename' in properties:
139127
defaults = {'mapreduce.job.queuename':properties['mapreduce.job.queuename']}
140128
try:
141-
data = self._hdfs_client.read_file('%s/config-default.xml' % remote_path)
129+
with open('%s/config-default.xml' % staged_component_path, 'r') as config_default_file:
130+
data = config_default_file.read()
142131
except:
143132
logging.debug('No config-default.xml is detected.')
144133
data = None
145134

146135
if data is None:
147136
logging.debug('Creating config-default.xml to inject mapreduce.job.queuename property.')
148-
self._hdfs_client.create_file(deployer_utils.dict_to_xml(defaults), '%s/config-default.xml' % remote_path)
137+
with open('%s/config-default.xml' % staged_component_path, 'w') as config_default_file:
138+
config_default_file.write(deployer_utils.dict_to_xml(defaults))
149139
else:
150140
prop = None
151141
root = None
@@ -167,10 +157,76 @@ def _deploy_to_hadoop(self, properties, staged_component_path, remote_path, appl
167157
logging.debug('adding mapred.queue.names in config-default.xml')
168158
prop = ElementTree.SubElement(root, 'property')
169159
ElementTree.SubElement(prop, 'name').text = 'mapreduce.job.queuename'
170-
ElementTree.SubElement(prop, 'value').text = 'dev'
160+
ElementTree.SubElement(prop, 'value').text = properties['mapreduce.job.queuename']
171161
data = ElementTree.tostring(root)
172-
self._hdfs_client.remove('%s/config-default.xml' % remote_path)
173-
self._hdfs_client.create_file(data, '%s/config-default.xml' % remote_path)
162+
with open('%s/config-default.xml' % staged_component_path, 'w') as config_default_file:
163+
config_default_file.write(data)
164+
165+
file_list = [file_name for file_name in component['component_detail'] if os.path.isfile('%s/%s' % (staged_component_path, file_name))]
166+
# find workflow.xml files
167+
for afile in file_list:
168+
workflow_modified = False
169+
file_path = '%s/%s' % (staged_component_path, afile)
170+
with open(file_path, 'r') as component_file:
171+
workflow_xml = component_file.read()
172+
if 'uri:oozie:workflow' not in workflow_xml:
173+
continue
174+
logging.debug("Found workflow file %s", file_path)
175+
# copy config-default.xml into this directory
176+
if os.path.dirname(file_path) != staged_component_path:
177+
shutil.copyfile('%s/config-default.xml' % staged_component_path, '%s/config-default.xml' % os.path.dirname(file_path))
178+
179+
# set the spark opts --queue so spark jobs are put in the right queue
180+
spark_action_index = 0
181+
while spark_action_index >= 0:
182+
spark_action_index = workflow_xml.find('<spark ', spark_action_index+1)
183+
spark_end_index = workflow_xml.find('</spark>', spark_action_index)
184+
jar_end_index = workflow_xml.find('</jar>', spark_action_index, spark_end_index)
185+
opts_index = workflow_xml.find('<spark-opts>', spark_action_index, spark_end_index)
186+
opts_end_index = workflow_xml.find('</spark-opts>', opts_index, spark_end_index)
187+
queue_opt_index = workflow_xml.find('--queue ', opts_index, opts_end_index)
188+
if jar_end_index >= 0:
189+
if opts_index < 0:
190+
# we need to add a spark-opts element
191+
split_index = jar_end_index+len('</jar>')
192+
workflow_xml = '%s%s%s' % (workflow_xml[:split_index],
193+
'<spark-opts>--queue ${wf:conf("mapreduce.job.queuename")}</spark-opts>',
194+
workflow_xml[split_index:])
195+
workflow_modified = True
196+
elif queue_opt_index < 0:
197+
# we need to add a queue opt to the existing spark-opts element
198+
split_index = opts_end_index
199+
workflow_xml = '%s%s%s' % (workflow_xml[:split_index], ' --queue ${wf:conf("mapreduce.job.queuename")}', workflow_xml[split_index:])
200+
workflow_modified = True
201+
202+
# write out modified workflow if changes were made
203+
if workflow_modified:
204+
logging.debug("Writing out modified workflow xml to %s", file_path)
205+
with open(file_path, "w") as workflow_file:
206+
workflow_file.write(workflow_xml)
207+
208+
def _deploy_to_hadoop(self, component, properties, staged_component_path, remote_path, application_user, exclude=None):
209+
if exclude is None:
210+
exclude = []
211+
exclude.extend(['hdfs.json',
212+
'hbase.json',
213+
'properties.json',
214+
'application.properties'])
215+
216+
# setup queue config
217+
try:
218+
self._setup_queue_config(component, staged_component_path, properties)
219+
except Exception as ex:
220+
logging.error(traceback.format_exc())
221+
raise FailedCreation('Failed to set up yarn queue config: %s' % str(ex))
222+
223+
# stage the component files to hdfs
224+
self._hdfs_client.recursive_copy(staged_component_path, remote_path, exclude=exclude, permission=755)
225+
226+
# stage the instantiated job properties back to HDFS - no functional purpose,
227+
# just helps developers understand what has happened
228+
effective_job_properties = deployer_utils.dict_to_props(properties)
229+
self._hdfs_client.create_file(effective_job_properties, '%s/application.properties' % remote_path)
174230

175231
# submit to oozie
176232
result = self._submit_oozie(properties)

api/src/main/resources/plugins/sparkStreaming.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ def validate_component(self, component):
4040
errors.append('missing file application.properties')
4141
if 'log4j.properties' not in file_list:
4242
errors.append('missing file log4j.properties')
43+
if 'upstart.conf' in file_list:
44+
errors.append('Support for user supplied upstart.conf files has been deprecated, ' +
45+
'the deployment manager will supply one automatically. ' +
46+
'Please see PNDA example-applications for usage.')
4347
return errors
4448

4549
def get_component_type(self):
@@ -85,9 +89,10 @@ def create_component(self, staged_component_path, application_name, user_name, c
8589
properties['component_py_files'] = ''
8690

8791
if 'upstart.conf' in component['component_detail']:
88-
# old style applications for backward compatibility
89-
service_script = 'upstart.conf'
90-
service_script_install_path = '/etc/init/%s.conf' % service_name
92+
# old style applications - reject these
93+
raise Exception('Support for user supplied upstart.conf files has been deprecated, ' +
94+
'the deployment manager will supply one automatically. ' +
95+
'Please see PNDA example-applications for usage.')
9196
else:
9297
# new style applications that don't need to provide upstart.conf or yarn-kill.py
9398
if 'component_main_jar' in properties and 'component_main_class' not in properties:

api/src/main/resources/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
argparse==1.2.1
22
backports-abc==0.5
33
backports.ssl-match-hostname==3.5.0.1
4-
certifi==2016.9.26
4+
certifi==2017.4.17
55
cffi==1.9.1
66
cm-api==14.0.0
77
cryptography==1.7.1
@@ -26,4 +26,4 @@ thrift==0.9.3
2626
thriftpy==0.3.9
2727
tornado==4.4.2
2828
tornado-cors==0.6.0
29-
wsgiref==0.1.2
29+
wsgiref==0.1.2

0 commit comments

Comments
 (0)