Skip to content

Commit 2528b38

Browse files
committed
[LIVY-1024] Upgrade Livy to Python3
## What changes were proposed in this pull request? * Python2 was deprecated a while back, most systems/environments don't support it anymore * Bump Livy to Python3, remove Python2 support since Spark 2 has also now been deprecated * Closes LIVY-1024 ## How was this patch tested? * CI / Locally tested the python3 files manually
1 parent f8ec2bc commit 2528b38

File tree

14 files changed

+71
-190
lines changed

14 files changed

+71
-190
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,20 @@ To build Livy, you will need:
2929
Debian/Ubuntu:
3030
* mvn (from ``maven`` package or maven3 tarball)
3131
* openjdk-8-jdk (or Oracle JDK 8)
32-
* Python 2.7+
32+
* Python 3.x+
3333
* R 3.x
3434

3535
Redhat/CentOS:
3636
* mvn (from ``maven`` package or maven3 tarball)
3737
* java-1.8.0-openjdk (or Oracle JDK 8)
38-
* Python 2.7+
38+
* Python 3.x+
3939
* R 3.x
4040

4141
MacOS:
4242
* Xcode command line tools
4343
* Oracle's JDK 1.8
4444
* Maven (Homebrew)
45-
* Python 2.7+
45+
* Python 3.x+
4646
* R 3.x
4747

4848
Required python packages for building Livy:

dev/docker/livy-dev-base/Dockerfile

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -70,32 +70,11 @@ RUN git clone https://github.com/pyenv/pyenv.git $HOME/pyenv
7070
ENV PYENV_ROOT=$HOME/pyenv
7171
ENV PATH="$HOME/pyenv/shims:$HOME/pyenv/bin:$HOME/bin:$PATH"
7272

73-
RUN pyenv install -v 2.7.18 && \
74-
pyenv install -v 3.9.21 && \
75-
pyenv global 2.7.18 3.9.21 && \
73+
RUN pyenv install -v 3.9.21 && \
74+
pyenv global 3.9.21 && \
7675
pyenv rehash
7776

78-
# Add build dependencies for python2
79-
# - First we upgrade pip because that makes a lot of things better
80-
# - Then we remove the provided version of setuptools and install a different version
81-
# - Then we install additional dependencies
82-
RUN python2 -m pip install -U "pip < 21.0" && \
83-
apt-get remove -y python-setuptools && \
84-
python2 -m pip install "setuptools < 36" && \
85-
python2 -m pip install \
86-
cloudpickle \
87-
codecov \
88-
flake8 \
89-
flaky \
90-
"future>=0.15.2" \
91-
"futures>=3.0.5" \
92-
pytest \
93-
pytest-runner \
94-
requests-kerberos \
95-
"requests >= 2.10.0" \
96-
"responses >= 0.5.1"
97-
98-
# Now do the same for python3
77+
# Install build dependencies for python3
9978
RUN python3 -m pip install -U pip && pip3 install \
10079
cloudpickle \
10180
codecov \
@@ -112,4 +91,3 @@ RUN pyenv rehash
11291
RUN apt remove -y openjdk-11-jre-headless
11392

11493
WORKDIR /workspace
115-

dev/merge_livy_pr.py

Lines changed: 17 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,15 @@
3333
# usage: ./merge_livy_pr.py (see config env vars below)
3434
#
3535

36-
3736
import json
3837
import os
3938
import re
4039
import subprocess
4140
import sys
41+
import urllib.request
42+
from urllib.error import HTTPError
4243

43-
if sys.version_info[0] < 3:
44-
import urllib2
45-
from urllib2 import HTTPError
46-
input_prompt_fn = raw_input
47-
else:
48-
import urllib.request as urllib2
49-
from urllib.error import HTTPError
50-
input_prompt_fn = input
44+
input_prompt_fn = input
5145

5246
try:
5347
import jira.client
@@ -71,21 +65,19 @@
7165
# https://github.com/settings/tokens. This script only requires the "public_repo" scope.
7266
GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY")
7367

74-
7568
GITHUB_BASE = "https://github.com/apache/incubator-livy/pull"
7669
GITHUB_API_BASE = "https://api.github.com/repos/apache/incubator-livy"
7770
JIRA_BASE = "https://issues.apache.org/jira/browse"
7871
JIRA_API_BASE = "https://issues.apache.org/jira"
7972
# Prefix added to temporary branches
8073
BRANCH_PREFIX = "PR_TOOL"
8174

82-
8375
def get_json(url):
8476
try:
85-
request = urllib2.Request(url)
77+
request = urllib.request.Request(url)
8678
if GITHUB_OAUTH_KEY:
8779
request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY)
88-
return json.load(urllib2.urlopen(request))
80+
return json.load(urllib.request.urlopen(request))
8981
except HTTPError as e:
9082
if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0':
9183
print("Exceeded the GitHub API rate limit; see the instructions in " +
@@ -95,42 +87,34 @@ def get_json(url):
9587
print("Unable to fetch URL, exiting: %s" % url)
9688
sys.exit(-1)
9789

98-
9990
def fail(msg):
10091
print(msg)
10192
clean_up()
10293
sys.exit(-1)
10394

104-
10595
def run_cmd(cmd):
10696
print(cmd)
10797
if isinstance(cmd, list):
10898
out_bytes = subprocess.check_output(cmd)
10999
else:
110100
out_bytes = subprocess.check_output(cmd.split(" "))
111-
if sys.version_info[0] > 2:
112-
return out_bytes.decode()
113-
else:
114-
return out_bytes
115-
101+
return out_bytes.decode()
116102

117103
def continue_maybe(prompt):
118104
result = input_prompt_fn("\n%s (y/n): " % prompt)
119105
if result.lower() != "y":
120106
fail("Okay, exiting")
121107

122-
123108
def clean_up():
124109
print("Restoring head pointer to %s" % original_head)
125110
run_cmd("git checkout %s" % original_head)
126111

127112
branches = run_cmd("git branch").replace(" ", "").split("\n")
128113

129-
for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches):
114+
for branch in [x for x in branches if x.startswith(BRANCH_PREFIX)]:
130115
print("Deleting local branch %s" % branch)
131116
run_cmd("git branch -D %s" % branch)
132117

133-
134118
# merge the requested PR and return the merge hash
135119
def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
136120
pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num)
@@ -201,7 +185,6 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
201185
print("Merge hash: %s" % merge_hash)
202186
return merge_hash
203187

204-
205188
def cherry_pick(pr_num, merge_hash, default_branch):
206189
pick_ref = input_prompt_fn("Enter a branch name [%s]: " % default_branch)
207190
if pick_ref == "":
@@ -236,15 +219,13 @@ def cherry_pick(pr_num, merge_hash, default_branch):
236219
print("Pick hash: %s" % pick_hash)
237220
return pick_ref
238221

239-
240222
def fix_version_from_branch(branch, versions):
241223
# Note: Assumes this is a sorted (newest->oldest) list of un-released versions
242224
if branch == "master":
243225
return versions[0]
244226
else:
245227
branch_ver = branch.replace("branch-", "")
246-
return filter(lambda x: x.name.startswith(branch_ver), versions)[-1]
247-
228+
return [x for x in versions if x.name.startswith(branch_ver)][-1]
248229

249230
def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
250231
asf_jira = jira.client.JIRA({'server': JIRA_API_BASE},
@@ -275,11 +256,11 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
275256

276257
versions = asf_jira.project_versions("LIVY")
277258
versions = sorted(versions, key=lambda x: x.name, reverse=True)
278-
versions = filter(lambda x: x.raw['released'] is False, versions)
259+
versions = [x for x in versions if x.raw['released'] is False]
279260
# Consider only x.y.z versions
280-
versions = filter(lambda x: re.match('\d+\.\d+\.\d+', x.name), versions)
261+
versions = [x for x in versions if re.match(r'\d+\.\d+\.\d+', x.name)]
281262

282-
default_fix_versions = map(lambda x: fix_version_from_branch(x, versions).name, merge_branches)
263+
default_fix_versions = [fix_version_from_branch(x, versions).name for x in merge_branches]
283264
for v in default_fix_versions:
284265
# Handles the case where we have forked a release branch but not yet made the release.
285266
# In this case, if the PR is committed to the master branch and the release branch, we
@@ -289,7 +270,7 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
289270
if patch == "0":
290271
previous = "%s.%s.%s" % (major, int(minor) - 1, 0)
291272
if previous in default_fix_versions:
292-
default_fix_versions = filter(lambda x: x != v, default_fix_versions)
273+
default_fix_versions = [x for x in default_fix_versions if x != v]
293274
default_fix_versions = ",".join(default_fix_versions)
294275

295276
fix_versions = input_prompt_fn(
@@ -299,19 +280,18 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
299280
fix_versions = fix_versions.replace(" ", "").split(",")
300281

301282
def get_version_json(version_str):
302-
return filter(lambda v: v.name == version_str, versions)[0].raw
283+
return [v for v in versions if v.name == version_str][0].raw
303284

304-
jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)
285+
jira_fix_versions = [get_version_json(v) for v in fix_versions]
305286

306-
resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
307-
resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
287+
resolve = [a for a in asf_jira.transitions(jira_id) if a['name'] == "Resolve Issue"][0]
288+
resolution = [r for r in asf_jira.resolutions() if r.raw['name'] == "Fixed"][0]
308289
asf_jira.transition_issue(
309290
jira_id, resolve["id"], fixVersions=jira_fix_versions,
310291
comment=comment, resolution={'id': resolution.raw['id']})
311292

312293
print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))
313294

314-
315295
def resolve_jira_issues(title, merge_branches, comment):
316296
jira_ids = re.findall("LIVY-[0-9]{3,6}", title)
317297

@@ -320,7 +300,6 @@ def resolve_jira_issues(title, merge_branches, comment):
320300
for jira_id in jira_ids:
321301
resolve_jira_issue(merge_branches, comment, jira_id)
322302

323-
324303
def standardize_jira_ref(text):
325304
"""
326305
Standardize the [LIVY-XXXXX] [MODULE] prefix
@@ -362,7 +341,6 @@ def standardize_jira_ref(text):
362341

363342
return clean_text
364343

365-
366344
def get_current_ref():
367345
ref = run_cmd("git rev-parse --abbrev-ref HEAD").strip()
368346
if ref == 'HEAD':
@@ -371,15 +349,14 @@ def get_current_ref():
371349
else:
372350
return ref
373351

374-
375352
def main():
376353
global original_head
377354

378355
os.chdir(LIVY_HOME)
379356
original_head = get_current_ref()
380357

381358
branches = get_json("%s/branches" % GITHUB_API_BASE)
382-
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
359+
branch_names = [x for x in [x['name'] for x in branches] if x.startswith("branch-")]
383360
# Assumes branch names can be sorted lexicographically
384361
latest_branch = sorted(branch_names, reverse=True)[0]
385362

@@ -462,7 +439,6 @@ def main():
462439
print("Could not find jira-python library. Run 'sudo pip install jira' to install.")
463440
print("Exiting without trying to close the associated JIRA.")
464441

465-
466442
if __name__ == "__main__":
467443
import doctest
468444
(failure_count, test_count) = doctest.testmod()

examples/src/main/python/pi_app.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
# limitations under the License.
1616
#
1717

18-
from __future__ import print_function
19-
2018
import sys
2119
from random import random
2220
from operator import add
@@ -48,7 +46,7 @@ def f(_):
4846
return 1 if x ** 2 + y ** 2 <= 1 else 0
4947

5048
def pi_job(context):
51-
count = context.sc.parallelize(range(1, samples + 1), slices).map(f).reduce(add)
49+
count = context.sc.parallelize(list(range(1, samples + 1)), slices).map(f).reduce(add)
5250
return 4.0 * count / samples
5351

5452
pi = client.submit(pi_job).result()

integration-test/src/test/resources/batch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,6 @@
2222
output = sys.argv[1]
2323
sc = SparkContext(appName="PySpark Test")
2424
try:
25-
sc.parallelize(range(100), 10).map(lambda x: (x, x * 2)).saveAsTextFile(output)
25+
sc.parallelize(list(range(100)), 10).map(lambda x: (x, x * 2)).saveAsTextFile(output)
2626
finally:
2727
sc.stop()

0 commit comments

Comments
 (0)