diff --git a/CIME/Tools/wait_for_tests b/CIME/Tools/wait_for_tests index c166061c99b..484b393b6f4 100755 --- a/CIME/Tools/wait_for_tests +++ b/CIME/Tools/wait_for_tests @@ -87,7 +87,7 @@ OR ) parser.add_argument( - "--force-log-upload", + "--cdash-force-log-upload", action="store_true", help="Always upload logs to cdash, even if test passed", ) @@ -105,6 +105,11 @@ OR help="The name of the CDash project where results should be uploaded", ) + parser.add_argument( + "--cdash-tmproot", + help="Where to put temporary files needed to do cdash submission. Default=/tmp", + ) + parser.add_argument( "-g", "--cdash-build-group", @@ -132,9 +137,10 @@ OR args.ignore_memleak, args.cdash_build_name, args.cdash_project, + args.cdash_tmproot, args.cdash_build_group, args.timeout, - args.force_log_upload, + args.cdash_force_log_upload, args.no_run, args.update_success, ) @@ -153,6 +159,7 @@ def _main_func(description): ignore_memleak, cdash_build_name, cdash_project, + cdash_tmproot, cdash_build_group, timeout, force_log_upload, @@ -172,6 +179,7 @@ def _main_func(description): ignore_memleak=ignore_memleak, cdash_build_name=cdash_build_name, cdash_project=cdash_project, + cdash_tmproot=cdash_tmproot, cdash_build_group=cdash_build_group, timeout=timeout, force_log_upload=force_log_upload, diff --git a/CIME/tests/test_sys_wait_for_tests.py b/CIME/tests/test_sys_wait_for_tests.py index 0377d65771e..3484c9af7a5 100644 --- a/CIME/tests/test_sys_wait_for_tests.py +++ b/CIME/tests/test_sys_wait_for_tests.py @@ -9,6 +9,7 @@ from CIME import utils from CIME import test_status +from CIME.wait_for_tests import ENV_VAR_KEEP_CDASH from CIME.tests import base from CIME.tests import utils as test_utils @@ -110,6 +111,8 @@ def tearDown(self): for testdir in self._testdirs: shutil.rmtree(testdir) + os.environ.pop(ENV_VAR_KEEP_CDASH, None) + def simple_test(self, testdir, expected_results, extra_args="", build_name=None): # Need these flags to test dashboard if e3sm if self._config.create_test_flag_mode == "e3sm" and build_name is not None: @@ -270,6 +273,7 @@ def test_wait_for_test_cdash_pass(self): def test_wait_for_test_cdash_kill(self): expected_results = ["PEND" if item == 5 else "PASS" for item in range(10)] + os.environ[ENV_VAR_KEEP_CDASH] = "True" build_name = "regression_test_kill_" + self._timestamp run_thread = threading.Thread( target=self.threaded_test, diff --git a/CIME/wait_for_tests.py b/CIME/wait_for_tests.py index 928b010ba30..1f6940138a9 100644 --- a/CIME/wait_for_tests.py +++ b/CIME/wait_for_tests.py @@ -1,13 +1,13 @@ # pylint: disable=import-error -import queue -import os, time, threading, socket, signal, shutil, glob +import queue, os, time, threading, socket, signal, shutil, glob, tempfile +from pathlib import Path # pylint: disable=import-error import logging import xml.etree.ElementTree as xmlet import CIME.utils -from CIME.utils import expect, Timeout, run_cmd_no_fail, safe_copy, CIMEError +from CIME.utils import expect, Timeout, run_cmd, run_cmd_no_fail, safe_copy, CIMEError from CIME.XML.machines import Machines from CIME.test_status import * from CIME.provenance import save_test_success @@ -17,6 +17,7 @@ E3SM_MAIN_CDASH = "E3SM" CDASH_DEFAULT_BUILD_GROUP = "ACME_Latest" SLEEP_INTERVAL_SEC = 0.1 +ENV_VAR_KEEP_CDASH = "CIME_TEST_CDASH_WFT" ############################################################################### def signal_handler(*_): @@ -92,24 +93,16 @@ def create_cdash_xml_boiler( utc_time, current_time, hostname, - git_commit, ): ############################################################################### site_elem = xmlet.Element("Site") - if "JENKINS_START_TIME" in os.environ: - time_info_str = "Total testing time: {:d} seconds".format( - int(current_time) - int(os.environ["JENKINS_START_TIME"]) - ) - else: - time_info_str = "" - site_elem.attrib["BuildName"] = cdash_build_name site_elem.attrib["BuildStamp"] = "{}-{}".format(utc_time, cdash_build_group) site_elem.attrib["Name"] = hostname site_elem.attrib["OSName"] = "Linux" site_elem.attrib["Hostname"] = hostname - site_elem.attrib["OSVersion"] = "Commit: {}{}".format(git_commit, time_info_str) + site_elem.attrib["OSVersion"] = "Unknown" phase_elem = xmlet.SubElement(site_elem, phase) @@ -130,7 +123,6 @@ def create_cdash_config_xml( current_time, hostname, data_rel_path, - git_commit, ): ############################################################################### site_elem, config_elem = create_cdash_xml_boiler( @@ -140,7 +132,6 @@ def create_cdash_config_xml( utc_time, current_time, hostname, - git_commit, ) xmlet.SubElement(config_elem, "ConfigureCommand").text = "namelists" @@ -165,7 +156,7 @@ def create_cdash_config_xml( xmlet.SubElement(config_elem, "ElapsedMinutes").text = "0" # Skip for now etree = xmlet.ElementTree(site_elem) - etree.write(os.path.join(data_rel_path, "Configure.xml")) + etree.write(data_rel_path / "Configure.xml") ############################################################################### @@ -177,7 +168,6 @@ def create_cdash_build_xml( current_time, hostname, data_rel_path, - git_commit, ): ############################################################################### site_elem, build_elem = create_cdash_xml_boiler( @@ -187,7 +177,6 @@ def create_cdash_build_xml( utc_time, current_time, hostname, - git_commit, ) xmlet.SubElement(build_elem, "ConfigureCommand").text = "case.build" @@ -214,7 +203,7 @@ def create_cdash_build_xml( xmlet.SubElement(build_elem, "ElapsedMinutes").text = "0" # Skip for now etree = xmlet.ElementTree(site_elem) - etree.write(os.path.join(data_rel_path, "Build.xml")) + etree.write(data_rel_path / "Build.xml") ############################################################################### @@ -226,7 +215,6 @@ def create_cdash_test_xml( current_time, hostname, data_rel_path, - git_commit, ): ############################################################################### site_elem, testing_elem = create_cdash_xml_boiler( @@ -236,7 +224,6 @@ def create_cdash_test_xml( utc_time, current_time, hostname, - git_commit, ) test_list_elem = xmlet.SubElement(testing_elem, "TestList") @@ -298,28 +285,20 @@ def create_cdash_test_xml( xmlet.SubElement(testing_elem, "ElapsedMinutes").text = "0" # Skip for now etree = xmlet.ElementTree(site_elem) - - etree.write(os.path.join(data_rel_path, "Test.xml")) + etree.write(data_rel_path / "Test.xml") ############################################################################### def create_cdash_xml_fakes( - results, cdash_build_name, cdash_build_group, utc_time, current_time, hostname + results, + cdash_build_name, + cdash_build_group, + utc_time, + current_time, + hostname, + data_rel_path, ): ############################################################################### - # We assume all cases were created from the same code repo - first_result_case = os.path.dirname(list(results.items())[0][1][0]) - try: - srcroot = run_cmd_no_fail( - "./xmlquery --value SRCROOT", from_dir=first_result_case - ) - except CIMEError: - # Use repo containing this script as last resort - srcroot = os.path.join(CIME.utils.get_cime_root(), "..") - - git_commit = CIME.utils.get_current_commit(repo=srcroot) - - data_rel_path = os.path.join("Testing", utc_time) create_cdash_config_xml( results, @@ -329,7 +308,6 @@ def create_cdash_xml_fakes( current_time, hostname, data_rel_path, - git_commit, ) create_cdash_build_xml( @@ -340,7 +318,6 @@ def create_cdash_xml_fakes( current_time, hostname, data_rel_path, - git_commit, ) create_cdash_test_xml( @@ -351,88 +328,86 @@ def create_cdash_xml_fakes( current_time, hostname, data_rel_path, - git_commit, ) ############################################################################### def create_cdash_upload_xml( - results, cdash_build_name, cdash_build_group, utc_time, hostname, force_log_upload + results, + cdash_build_name, + cdash_build_group, + utc_time, + hostname, + force_log_upload, + tmp_path, + data_rel_path, ): ############################################################################### - data_rel_path = os.path.join("Testing", utc_time) - - try: - log_dir = "{}_logs".format(cdash_build_name) - - need_to_upload = False + log_dirname = f"{cdash_build_name}_logs" + log_path = tmp_path / log_dirname + + need_to_upload = False + + for test_name, test_data in results.items(): + test_path, test_status, _ = test_data + + if test_status != TEST_PASS_STATUS or force_log_upload: + test_case_dir = os.path.dirname(test_path) + + case_dirs = [test_case_dir] + case_base = os.path.basename(test_case_dir) + test_case2_dir = os.path.join(test_case_dir, "case2", case_base) + if os.path.exists(test_case2_dir): + case_dirs.append(test_case2_dir) + + for case_dir in case_dirs: + for param in ["EXEROOT", "RUNDIR", "CASEDIR"]: + if param == "CASEDIR": + log_src_dir = case_dir + else: + # it's possible that tests that failed very badly/early, and fake cases for testing + # will not be able to support xmlquery + try: + log_src_dir = run_cmd_no_fail( + "./xmlquery {} --value".format(param), + from_dir=case_dir, + ) + except CIMEError: + continue + + log_dst_dir = log_path / "{}{}_{}_logs".format( + test_name, + "" if case_dir == test_case_dir else ".case2", + param, + ) + log_dst_dir.mkdir(parents=True) + for log_file in glob.glob(os.path.join(log_src_dir, "*log*")): + if os.path.isdir(log_file): + shutil.copytree( + log_file, log_dst_dir / os.path.basename(log_file) + ) + else: + safe_copy(log_file, str(log_dst_dir)) + for log_file in glob.glob( + os.path.join(log_src_dir, "*.cprnc.out*") + ): + safe_copy(log_file, str(log_dst_dir)) - for test_name, test_data in results.items(): - test_path, test_status, _ = test_data + need_to_upload = True - if test_status != TEST_PASS_STATUS or force_log_upload: - test_case_dir = os.path.dirname(test_path) + if need_to_upload: - case_dirs = [test_case_dir] - case_base = os.path.basename(test_case_dir) - test_case2_dir = os.path.join(test_case_dir, "case2", case_base) - if os.path.exists(test_case2_dir): - case_dirs.append(test_case2_dir) + tarball = "{}.tar.gz".format(log_dirname) - for case_dir in case_dirs: - for param in ["EXEROOT", "RUNDIR", "CASEDIR"]: - if param == "CASEDIR": - log_src_dir = case_dir - else: - # it's possible that tests that failed very badly/early, and fake cases for testing - # will not be able to support xmlquery - try: - log_src_dir = run_cmd_no_fail( - "./xmlquery {} --value".format(param), - from_dir=case_dir, - ) - except: - continue - - log_dst_dir = os.path.join( - log_dir, - "{}{}_{}_logs".format( - test_name, - "" if case_dir == test_case_dir else ".case2", - param, - ), - ) - os.makedirs(log_dst_dir) - for log_file in glob.glob(os.path.join(log_src_dir, "*log*")): - if os.path.isdir(log_file): - shutil.copytree( - log_file, - os.path.join( - log_dst_dir, os.path.basename(log_file) - ), - ) - else: - safe_copy(log_file, log_dst_dir) - for log_file in glob.glob( - os.path.join(log_src_dir, "*.cprnc.out*") - ): - safe_copy(log_file, log_dst_dir) - - need_to_upload = True - - if need_to_upload: - - tarball = "{}.tar.gz".format(log_dir) - if os.path.exists(tarball): - os.remove(tarball) - - run_cmd_no_fail( - "tar -cf - {} | gzip -c".format(log_dir), arg_stdout=tarball - ) - base64 = run_cmd_no_fail("base64 {}".format(tarball)) + run_cmd_no_fail( + "tar -cf - {} | gzip -c".format(log_dirname), + arg_stdout=tarball, + from_dir=str(tmp_path), + ) + base64 = run_cmd_no_fail("base64 {}".format(tarball), from_dir=str(tmp_path)) - xml_text = r""" + xml_text = r""" "?> @@ -444,25 +419,26 @@ def create_cdash_upload_xml( """.format( - cdash_build_name, - utc_time, - cdash_build_group, - hostname, - os.path.abspath(tarball), - base64, - ) - - with open(os.path.join(data_rel_path, "Upload.xml"), "w") as fd: - fd.write(xml_text) + cdash_build_name, + utc_time, + cdash_build_group, + hostname, + str((tmp_path / tarball).absolute()), + base64, + ) - finally: - if os.path.isdir(log_dir): - shutil.rmtree(log_dir) + with (data_rel_path / "Upload.xml").open(mode="w") as fd: + fd.write(xml_text) ############################################################################### def create_cdash_xml( - results, cdash_build_name, cdash_project, cdash_build_group, force_log_upload=False + results, + cdash_build_name, + cdash_project, + cdash_build_group, + force_log_upload=False, + cdash_tmproot=None, ): ############################################################################### @@ -482,8 +458,78 @@ def create_cdash_xml( "Could not convert hostname '{}' into an E3SM machine name".format(hostname) ) - for drop_method in ["https", "http"]: - dart_config = """ + # We assume all cases were created from the same code repo + first_result_case = os.path.dirname(list(results.items())[0][1][0]) + try: + srcroot = run_cmd_no_fail( + "./xmlquery --value SRCROOT", from_dir=first_result_case + ) + except CIMEError: + # Use repo containing this script as last resort + srcroot = os.path.join(CIME.utils.get_cime_root(), "..") + + git_commit = CIME.utils.get_current_commit(repo=srcroot) + + # Get total elapsed time + if "JENKINS_START_TIME" in os.environ: + time_info = int(current_time) - int(os.environ["JENKINS_START_TIME"]) + else: + time_info = "unknown" + + if cdash_tmproot: + tmproots = [cdash_tmproot] + else: + tmproots = [None, first_result_case, os.getcwd()] + + # Try multiple tmproots if necessary. The default /tmp will be tried first + # unless cdash_tmproot was provided. The location of the default can be + # modified via the TMPDIR environment variable. + for tmproot in tmproots: + try: + with tempfile.TemporaryDirectory(dir=tmproot) as tmpdir: + tmp_path = Path(tmpdir) + utc_time = time.strftime("%Y%m%d-%H%M", utc_time_tuple) + dart_path = tmp_path / "DartConfiguration.tcl" + testing_path = tmp_path / "Testing" + testtime_dir = testing_path / utc_time # Most action happens here + tag_file = testing_path / "TAG" + notes_file = tmp_path / "notes.txt" + + testtime_dir.mkdir(parents=True) + + # Make tag file + with tag_file.open(mode="w") as tag_fd: + tag_fd.write(f"{utc_time}\n{cdash_build_group}\n") + + # Make notes file + with notes_file.open(mode="w") as notes_fd: + notes_fd.write( + f"Commit {git_commit}\nTotal testing time {time_info} seconds\n" + ) + + create_cdash_xml_fakes( + results, + cdash_build_name, + cdash_build_group, + utc_time, + current_time, + hostname, + testtime_dir, + ) + + create_cdash_upload_xml( + results, + cdash_build_name, + cdash_build_group, + utc_time, + hostname, + force_log_upload, + tmp_path, + testtime_dir, + ) + + for drop_method in ["https", "http"]: + dart_config = """ SourceDirectory: {0} BuildDirectory: {0} @@ -512,55 +558,42 @@ def create_cdash_xml( UseLaunchers: CurlOptions: CURLOPT_SSL_VERIFYPEER_OFF;CURLOPT_SSL_VERIFYHOST_OFF """.format( - os.getcwd(), - hostname, - cdash_build_name, - cdash_project, - shutil.which("scp"), - cdash_timestamp, - drop_method, - ) - - with open("DartConfiguration.tcl", "w") as dart_fd: - dart_fd.write(dart_config) - - utc_time = time.strftime("%Y%m%d-%H%M", utc_time_tuple) - testing_dir = os.path.join("Testing", utc_time) - if os.path.isdir(testing_dir): - shutil.rmtree(testing_dir) - - os.makedirs(os.path.join("Testing", utc_time)) - - # Make tag file - with open("Testing/TAG", "w") as tag_fd: - tag_fd.write("{}\n{}\n".format(utc_time, cdash_build_group)) + str(tmp_path.absolute()), + hostname, + cdash_build_name, + cdash_project, + shutil.which("scp"), + cdash_timestamp, + drop_method, + ) + with dart_path.open(mode="w") as dart_fd: + dart_fd.write(dart_config) - create_cdash_xml_fakes( - results, - cdash_build_name, - cdash_build_group, - utc_time, - current_time, - hostname, - ) + stat, out, _ = run_cmd( + "ctest -VV -D NightlySubmit -A notes.txt", + combine_output=True, + from_dir=str(tmp_path), + ) + if stat != 0: + logging.warning( + "ctest upload drop method {} FAILED:\n{}".format( + drop_method, out + ) + ) + else: + logging.info("Upload SUCCESS:\n{}".format(out)) + if ENV_VAR_KEEP_CDASH in os.environ: + logging.info( + f"Test mode enabled, copying {str(tmp_path)} to {os.getcwd()}" + ) + safe_copy(str(tmp_path / "Testing"), os.getcwd()) - create_cdash_upload_xml( - results, - cdash_build_name, - cdash_build_group, - utc_time, - hostname, - force_log_upload, - ) + return - stat, out, _ = run_cmd("ctest -VV -D NightlySubmit", combine_output=True) - if stat != 0: + except Exception as e: logging.warning( - "ctest upload drop method {} FAILED:\n{}".format(drop_method, out) + f"Using temp root '{tmproot}', cdash submission failed with error {e}" ) - else: - logging.info("Upload SUCCESS:\n{}".format(out)) - return expect(False, "All cdash upload attempts failed") @@ -727,6 +760,7 @@ def wait_for_tests( ignore_memleak=False, cdash_build_name=None, cdash_project=E3SM_MAIN_CDASH, + cdash_tmproot=None, cdash_build_group=CDASH_DEFAULT_BUILD_GROUP, timeout=None, force_log_upload=False, @@ -833,6 +867,7 @@ def wait_for_tests( cdash_project, cdash_build_group, force_log_upload, + cdash_tmproot, ) return all_pass