|
25 | 25 | import re
|
26 | 26 | import sys
|
27 | 27 | import subprocess
|
| 28 | +import glob |
| 29 | +import shutil |
28 | 30 | from collections import namedtuple
|
29 | 31 |
|
30 | 32 | from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES
|
@@ -400,15 +402,66 @@ def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
|
400 | 402 | run_scala_tests_sbt(test_modules, test_profiles)
|
401 | 403 |
|
402 | 404 |
|
403 |
| -def run_python_tests(test_modules, parallelism): |
| 405 | +def run_python_tests(test_modules, parallelism, with_coverage=False): |
404 | 406 | set_title_and_block("Running PySpark tests", "BLOCK_PYSPARK_UNIT_TESTS")
|
405 | 407 |
|
406 |
| - command = [os.path.join(SPARK_HOME, "python", "run-tests")] |
| 408 | + if with_coverage: |
| 409 | + # Coverage makes the PySpark tests flaky due to heavy parallelism. |
| 410 | + # When we run PySpark tests with coverage, it uses 4 for now as |
| 411 | + # workaround. |
| 412 | + parallelism = 4 |
| 413 | + script = "run-tests-with-coverage" |
| 414 | + else: |
| 415 | + script = "run-tests" |
| 416 | + command = [os.path.join(SPARK_HOME, "python", script)] |
407 | 417 | if test_modules != [modules.root]:
|
408 | 418 | command.append("--modules=%s" % ','.join(m.name for m in test_modules))
|
409 | 419 | command.append("--parallelism=%i" % parallelism)
|
410 | 420 | run_cmd(command)
|
411 | 421 |
|
| 422 | + if with_coverage: |
| 423 | + post_python_tests_results() |
| 424 | + |
| 425 | + |
| 426 | +def post_python_tests_results(): |
| 427 | + if "SPARK_TEST_KEY" not in os.environ: |
| 428 | + print("[error] 'SPARK_TEST_KEY' environment variable was not set. Unable to post " |
| 429 | + "PySpark coverage results.") |
| 430 | + sys.exit(1) |
| 431 | + spark_test_key = os.environ.get("SPARK_TEST_KEY") |
| 432 | + # The steps below upload HTMLs to 'github.com/spark-test/pyspark-coverage-site'. |
| 433 | + # 1. Clone PySpark coverage site. |
| 434 | + run_cmd([ |
| 435 | + "git", |
| 436 | + "clone", |
| 437 | + "https://spark-test:%[email protected]/spark-test/pyspark-coverage-site.git" % spark_test_key]) |
| 438 | + # 2. Remove existing HTMLs. |
| 439 | + run_cmd(["rm", "-fr"] + glob.glob("pyspark-coverage-site/*")) |
| 440 | + # 3. Copy generated coverage HTMLs. |
| 441 | + for f in glob.glob("%s/python/test_coverage/htmlcov/*" % SPARK_HOME): |
| 442 | + shutil.copy(f, "pyspark-coverage-site/") |
| 443 | + os.chdir("pyspark-coverage-site") |
| 444 | + try: |
| 445 | + # 4. Check out to a temporary branch. |
| 446 | + run_cmd(["git", "symbolic-ref", "HEAD", "refs/heads/latest_branch"]) |
| 447 | + # 5. Add all the files. |
| 448 | + run_cmd(["git", "add", "-A"]) |
| 449 | + # 6. Commit current HTMLs. |
| 450 | + run_cmd([ |
| 451 | + "git", |
| 452 | + "commit", |
| 453 | + "-am", |
| 454 | + "Coverage report at latest commit in Apache Spark", |
| 455 | + '--author="Apache Spark Test Account <[email protected]>"']) |
| 456 | + # 7. Delete the old branch. |
| 457 | + run_cmd(["git", "branch", "-D", "gh-pages"]) |
| 458 | + # 8. Rename the temporary branch to master. |
| 459 | + run_cmd(["git", "branch", "-m", "gh-pages"]) |
| 460 | + # 9. Finally, force update to our repository. |
| 461 | + run_cmd(["git", "push", "-f", "origin", "gh-pages"]) |
| 462 | + finally: |
| 463 | + os.chdir("..") |
| 464 | + |
412 | 465 |
|
413 | 466 | def run_python_packaging_tests():
|
414 | 467 | set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS")
|
@@ -567,7 +620,11 @@ def main():
|
567 | 620 |
|
568 | 621 | modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
|
569 | 622 | if modules_with_python_tests:
|
570 |
| - run_python_tests(modules_with_python_tests, opts.parallelism) |
| 623 | + # We only run PySpark tests with coverage report in one specific job with |
| 624 | + # Spark master with SBT in Jenkins. |
| 625 | + is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ |
| 626 | + run_python_tests( |
| 627 | + modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) |
571 | 628 | run_python_packaging_tests()
|
572 | 629 | if any(m.should_run_r_tests for m in test_modules):
|
573 | 630 | run_sparkr_tests()
|
|
0 commit comments