diff --git a/scripts/python/generate_df_testrun_report.py b/scripts/python/generate_df_testrun_report.py new file mode 100755 index 0000000000..5a05f501e5 --- /dev/null +++ b/scripts/python/generate_df_testrun_report.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +""" +Python script that parses the Android Test Orchestrator's instrumentation logs for a given +Device Farm test run and generates a user-readable Junit report. +""" +import os +import argparse +import dload +import boto3 +import sys +import logging +from instrumentation_parser import Parser +from metrics import * +from junit_xml import TestSuite, TestCase + +LOG_FORMATTER = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +CONSOLE_HANDLER = logging.StreamHandler() +CONSOLE_HANDLER.setFormatter(LOG_FORMATTER) +LOGGER = logging.getLogger("DeviceFarmTestRunReportGenerator") +LOGGER.setLevel(os.getenv("LOG_LEVEL") if os.getenv("LOG_LEVEL") is not None else "INFO") +LOGGER.addHandler(CONSOLE_HANDLER) + +# Parse the required script arguments +def parse_arguments(): + parser = argparse.ArgumentParser(description="Utility that generates a report for a DeviceFarm test run.") + parser.add_argument("-r", "--run_arn", help="The ARN of the DeviceFarm test run.", required=True) + parser.add_argument("-m", "--module_name", help="The module name for the test suite.", required=True) + parser.add_argument("-o", "--output_path", help="Destination path for the build reports.", required=True) + return parser.parse_args() + +def main(arguments): + LOGGER.info(f"Starting to generate report...") + args = parse_arguments() + + # The path that the unzipped Device Farm artifacts will be unzipped into + logs_dir = "build/allTests/{}".format(args.module_name) + + # The path that the Device Farm instrumentation logs are in + log_file = logs_dir + "/Host_Machine_Files/$DEVICEFARM_LOG_DIR/instrument.log" + + df_client = boto3.client( + 'devicefarm', + region_name='us-west-2' + ) + + # For a particular Device Farm run, grab the list of all of the artifacts + response = df_client.list_artifacts( + arn=args.run_arn, + type="FILE" + ) + + # The instrumentation logs are stored in the "CUSTOMER_ARTIFACT" file for a test job + customer_artifacts = (artifact for artifact in response["artifacts"] if artifact["type"] == "CUSTOMER_ARTIFACT") + + # A single test run may have multiple jobs where each job tests on a different device + # A regular PR typically tests on one device while a release PR typically tests on 3 devices + # The instrumentation logs for each job is uploaded as a separate CUSTOMER_ARTIFACT in the + # run's artifacts. + for job_no, customer_artifact in enumerate(customer_artifacts): + LOGGER.info(f"Parsing result for artifact ARN: {customer_artifact['arn']}") + + unzip_result = dload.save_unzip(customer_artifact["url"], extract_path=logs_dir, delete_after=True) + if unzip_result is None or unzip_result == "": + LOGGER.error("Unzip of test run artifacts failed") + break + + parser = Parser(args.module_name) + metrics = [] + try: + # Open the provided file and then start to parse it + with open(log_file, "r") as file: + for line in file: + try: + parser.parse_line(line.strip()) + except Exception as e: + exception_value, exception_location = get_exception(sys.exc_info()) + LOGGER.error(f"Encountered an exception trying to parse the results: {exception_value} at [{exception_location}] for line:\n{line.strip()}") + + module_passing_tests = module_failing_tests = 0 + LOGGER.info(f"\n--------------------------\nTest Suite Statistics\n--------------------------") + test_run_passing_tests = test_run_failing_tests = 0 + + # The Device Farm run ARN is in the format of: + # arn:aws:devicefarm:us-west-2:ACCOUNT_ID:job:PROJECT_ARN_ID/RUN_ARN_ID + # So split the run ARN by ':', take the last element, split it by '/' and then use each + # component to format a URL to add to the test report for easy access to the logs and output files + arn_components = args.run_arn.split(":")[-1].split("/") + run_url = f"https://us-west-2.console.aws.amazon.com/devicefarm/home#/mobile/projects/{arn_components[0]}/runs/{arn_components[1]}/jobs/00000" + debug_messaging = f"You can find the detailed logs and output files at {run_url}" + + # Run through the parser results and translate them into the junit_xml data classes + # while also constructing the CloudWatch metrics + for test_suite_name, test_suite in parser.test_run.test_suites.items(): + test_cases = [] + for test_name, test in test_suite.tests.items(): + if test.status_code == 0 or test.status_code == -3 or test.status_code == -4: + test_status = "PASSED" + elif test.status_code == -1: + test_status = "ERROR" + else: + test_status = "FAILED" + + tc = TestCase(test_name, + classname=test_suite_name, + stdout=f"{debug_messaging}\n{test.stack_trace}", + status=test_status + ) + + if test_status == "FAILED": + tc.add_failure_info(test.stack_trace) + elif test_status == "ERROR": + tc.add_error_info(test.stack_trace) + + test_cases.append(tc) + + # Because a test run can have N number of test jobs, we need to distinguish each job's + # test result so we'll append an integer to it. + ts = TestSuite(test_suite_name + "-" + str(job_no), test_cases=test_cases) + ts_output = TestSuite.to_xml_string([ts]) + LOGGER.info(f"Saving test suite {test_suite_name} report.") + + if not os.path.exists(args.output_path): + os.makedirs(args.output_path) + f = open(args.output_path + test_suite_name + "-" + str(job_no) + ".xml", "w") + f.write(ts_output) + f.close() + + success_percentage = test_suite.passing_tests/(test_suite.passing_tests + test_suite.failing_tests) + LOGGER.info(f"Name: {test_suite_name}") + LOGGER.info(f"Passing Tests: {test_suite.passing_tests}") + LOGGER.info(f"Failing Tests: {test_suite.failing_tests}") + LOGGER.info(f"Success Percentage: {success_percentage}") + LOGGER.info(f"------------------------------------------------") + test_run_passing_tests += test_suite.passing_tests + module_passing_tests += test_suite.passing_tests + test_run_failing_tests += test_suite.failing_tests + module_failing_tests += test_suite.failing_tests + if (success_percentage < 1.0): + parser.get_stack_traces(test_suite, metrics) + + test_suite_dimension = [ get_dimension("Module", args.module_name), get_dimension("Test Suite", test_suite_name) ] + + # Test Suite Success Percentage + metrics.append(get_metric("Test Success Percentage", test_suite_dimension, success_percentage, "Count")) + + # Test Suite Success Count + metrics.append(get_metric("Tests Succeeded", test_suite_dimension, test_run_passing_tests, "Count")) + + # Test Suite Failure Count + metrics.append(get_metric("Tests Failed", test_suite_dimension, test_run_failing_tests, "Count")) + + LOGGER.info(f"\n--------------------------\nTest Run Statistics\n--------------------------") + LOGGER.info(f"Run Name: {args.module_name}") + LOGGER.info(f"Test Successes: {test_run_passing_tests}") + LOGGER.info(f"Test Failures: {test_run_failing_tests}") + success_percentage = test_run_passing_tests/(test_run_passing_tests + test_run_failing_tests) + LOGGER.info(f"Success Percentage: {success_percentage}") + LOGGER.info(f"Test Run Execution Time: {parser.execution_time}") + + module_dimension = [ get_dimension("Module", args.module_name) ] + success_percentage = module_passing_tests/(module_passing_tests + module_failing_tests) + # Test Run Success Percentage + metrics.append(get_metric("Test Success Percentage", module_dimension, success_percentage, "Count")) + # Test Run Success Count + metrics.append(get_metric("Tests Succeeded", module_dimension, module_passing_tests, "Count")) + # Test Run Failure Count + metrics.append(get_metric("Tests Failed", module_dimension, module_failing_tests, "Count")) + # Test Run Execution Time + metrics.append(get_metric("Execution Time", module_dimension, float(parser.execution_time), "Seconds")) + except Exception as e: + exception_value, exception_location = get_exception(sys.exc_info()) + + LOGGER.error(f"Encountered an exception trying to parse the results: {exception_value} at [{exception_location}]") + exception_dimensions = [ get_dimension("Exception", exception_value), get_dimension("Line Number", exception_location) ] + metrics.append(get_metric("Test Run Reporting Error", exception_dimensions, 1, "Count")) + print(f"Adding metric [{get_metric('Test Run Reporting Error', exception_dimensions, 1, 'Count')}]") + + # Now that the logs have been parsed and metrics have been gathered, we publish the metrics + # to CloudWatch. + try: + cw_client = boto3.client( + 'cloudwatch', + region_name='us-east-1' + ) + + response = cw_client.put_metric_data( + Namespace='AmplifyAndroidV2-IntegTests', + MetricData=metrics + ) + LOGGER.info(response) + except Exception as e: + exception_value, exception_location = get_exception(sys.exc_info()) + LOGGER.error(f"Encountered an exception trying to parse the results: {exception_value} at [{exception_location}]") + LOGGER.error(f"The metrics that were attempted to be published: {metrics}") + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/scripts/python/instrumentation_parser.py b/scripts/python/instrumentation_parser.py new file mode 100755 index 0000000000..6f7ccd0d8f --- /dev/null +++ b/scripts/python/instrumentation_parser.py @@ -0,0 +1,165 @@ +""" +Python script that parses the Android Test Orchestrator's instrumentation logs line by line +and stores the results in memory. +""" +import re +from dataclasses import dataclass +from typing import Dict +from metrics import * + +# Android InstrumentationResultParser for inspiration +# https://cs.android.com/android-studio/platform/tools/base/+/mirror-goog-studio-main:ddmlib/src/main/java/com/android/ddmlib/testrunner/InstrumentationResultParser.java;l=85?q=InstrumentationResultParser.java +CLASS_PREFIX = "INSTRUMENTATION_STATUS: class=" +CURRENT_TEST_PREFIX = "INSTRUMENTATION_STATUS: current=" +NUM_TESTS_PREFIX = "INSTRUMENTATION_STATUS: numtests=" +STREAM_PREFIX = "INSTRUMENTATION_STATUS: stream=" +TEST_NAME_PREFIX = "INSTRUMENTATION_STATUS: test=" +STATUS_CODE = "INSTRUMENTATION_STATUS_CODE:" +STACK_TRACE_PREFIX = "INSTRUMENTATION_STATUS: stack=" + +# These prefixes will always show up but we don't care about them for metrics reasoning +# Instead we use these to see if there are any other instrumentation logs that we don't recognize as known +ID_PREFIX = "INSTRUMENTATION_STATUS: id=" +RESULT_STREAM_PREFIX = "INSTRUMENTATION_RESULT: stream=" +CODE_PREFIX = "INSTRUMENTATION_CODE:" + +PACKAGE_NAMESPACE_PREFIX = "com.amplifyframework." +TIME_PREFIX = "Time: " + +@dataclass +class Test: + """The atomic test""" + # test_name + name: str + stack_trace: str = None + + """ + Instrumentation Status Code meanings: + 1: Start + 2: In Progress + -4: Assumption failed + -3: Ignored + -2: Failure + -1: Error + 0: OK + + https://cs.android.com/android-studio/platform/tools/base/+/mirror-goog-studio-main:ddmlib/src/main/java/com/android/ddmlib/testrunner/IInstrumentationResultParser.java;l=62?q=StatusKey + """ + status_code: int = 1 + +@dataclass +class TestSuite: + """A suite that contains many tests (i.e. the class)""" + # class_name + name: str + # test_name: test + tests: Dict[str, Test] + passing_tests: int = 0 + failing_tests: int = 0 + +@dataclass +class TestRun: + """A test run that contains many test suites (i.e. the module)""" + # module_name + name: str + # class_name: test_suite + test_suites: Dict[str, TestSuite] + + def contains_suite(name): + return test_suites.get(name) + +class Parser: + def __init__( + self, + module_name + ): + self.module_name = module_name + self.stack_trace = "" + self.execution_time = 0 + self.class_name = "" + self.test_run = None + + def is_relevant_stacktrace(self, line): + return "error" in line.lower() or "exception" in line.lower() or PACKAGE_NAMESPACE_PREFIX in line.lower() + + def get_stack_traces(self, test_suite, metrics): + pattern = r"@(\w{7})" + replacement = "@[JAVA_HASH_CODE]" + + failure_status_codes = {-1, -2} + filtered_tests = {k: v for k, v in test_suite.tests.items() if v.status_code in failure_status_codes} + for test_name, test in filtered_tests.items(): + sanitized_error = re.sub(pattern, replacement, ascii(test.stack_trace[0:500])) + stack_trace_dimensions = [ + get_dimension("Module", self.module_name), + get_dimension("Test Suite", test_suite.name), + get_dimension("Test", test_name), + get_dimension("Exception", sanitized_error) + ] + metrics.append(get_metric("Test Failure", stack_trace_dimensions, 1.0, "Count")) + + def parse_line(self, line): + line = line.strip() + + global test_num, num_tests, test_name, status_code + global test_suite, test, test_run_error, instrumentation_failure + + if CLASS_PREFIX in line: + class_tokens = line.replace(CLASS_PREFIX + PACKAGE_NAMESPACE_PREFIX, "").strip().split(".") + # Class Name == Test Suite name + self.class_name = class_tokens.pop() + + if self.test_run is None: + # Module doesn't exist yet which means the test suite and test don't either + test_suite = TestSuite(name=self.class_name, tests={}) + self.test_run = TestRun(name=self.module_name, test_suites={}) + else: + if self.test_run.test_suites.get(self.class_name) is None: + # Module exists but Test Suite doesn't + test_suite = TestSuite(name=self.class_name, tests={}) + else: + test_suite = self.test_run.test_suites.get(self.class_name) + elif CURRENT_TEST_PREFIX in line: + test_num = line.replace(CURRENT_TEST_PREFIX, "").strip() + elif NUM_TESTS_PREFIX in line: + num_tests = line.replace(NUM_TESTS_PREFIX, "").strip() + elif STREAM_PREFIX in line: + read_line = line.replace(STREAM_PREFIX, "").strip() + self.stack_trace = read_line + elif STACK_TRACE_PREFIX in line: + read_line = line.replace(STACK_TRACE_PREFIX, "").strip() + self.stack_trace = read_line + elif TEST_NAME_PREFIX in line: + test_name = line.replace(TEST_NAME_PREFIX, "").strip() + if test_suite.tests.get(test_name) is None: + # First check if the test exists already + # Initialize the new test + test = Test(name=test_name) + # Update it in the test suite + test_suite.tests[test_name] = test + self.test_run.test_suites[self.class_name] = test_suite + elif STATUS_CODE in line: + status_code = line.replace(STATUS_CODE, "").strip() + self.test_run.test_suites.get(self.class_name).tests.get(test_name).status_code = int(status_code) + if status_code == "0": + self.test_run.test_suites.get(self.class_name).passing_tests += 1 + if status_code == "-2": + print(f"Test #{test_num}: [{self.module_name}] // [{self.class_name}#{test_name}] FAILED") + print(f"--- Stacktrace: [{self.stack_trace}]") + self.test_run.test_suites.get(self.class_name).tests.get(test_name).stack_trace = self.stack_trace + self.test_run.test_suites.get(self.class_name).failing_tests += 1 + # The status code acts as a delimiter for a test case so we can clear out the stack trace + self.stack_trace = "" + elif TIME_PREFIX in line: + self.execution_time = line.replace(TIME_PREFIX, "").strip().replace(',','') + print(f"Setting time: {self.execution_time}") + elif "INSTRUMENTATION_" not in line: + # This line is likely a continuation of the ongoing stream so append to it + if self.is_relevant_stacktrace(line): + if self.stack_trace.isspace() or self.stack_trace == "": + self.stack_trace = line.replace("Error in ", "").strip() + else: + self.stack_trace = self.stack_trace + " // " + line + elif ID_PREFIX not in line and RESULT_STREAM_PREFIX not in line and CODE_PREFIX not in line: + # If there is a line that we don't expect, print it out for debugging + print(f"Found a line that hasn't been parsed: {line}") \ No newline at end of file diff --git a/scripts/python/metrics.py b/scripts/python/metrics.py new file mode 100755 index 0000000000..acd23088f5 --- /dev/null +++ b/scripts/python/metrics.py @@ -0,0 +1,30 @@ +import re + +def get_dimension(name, value): + return { + "Name": name, + "Value": value + } + +def get_metric(name, dimensions, value, unit): + return { + "MetricName": name, + "Dimensions": dimensions, + "Value": value, + "Unit": unit + } + +def get_exception(exc_info): + filename_regex = "(\w+\.py)" + exception_location = "" + exc_type, exc_value, exc_traceback = exc_info + tb = exc_traceback + while tb is not None: + frame = tb.tb_frame + exception_location += re.findall(filename_regex, frame.f_code.co_filename)[0] + " @ " + \ + frame.f_code.co_name + "#" + str(tb.tb_lineno) + tb = tb.tb_next + if tb is not None: + exception_location += " >> " + + return exc_value, exception_location \ No newline at end of file diff --git a/scripts/run_test_in_devicefarm.sh b/scripts/run_test_in_devicefarm.sh index 30ef43b63f..9023e6d702 100755 --- a/scripts/run_test_in_devicefarm.sh +++ b/scripts/run_test_in_devicefarm.sh @@ -1,6 +1,7 @@ #!/bin/bash project_arn=$DEVICEFARM_PROJECT_ARN max_devices=$NUMBER_OF_DEVICES_TO_TEST +test_spec_arn=$DEVICEFARM_TEST_SPEC_ARN module_name=$1 file_name="$module_name-debug-androidTest.apk" full_path="$module_name/build/outputs/apk/androidTest/debug/$file_name" @@ -111,19 +112,19 @@ stopDuplicates # Schedule the test run in device farm echo "Scheduling test run" run_arn=`aws devicefarm schedule-run --project-arn=$project_arn \ - --app-arn="$app_package_upload_arn" \ - --device-selection-configuration='{ - "filters": [ - {"attribute": "ARN", "operator":"IN", "values":["'$minDevice'", "'$middleDevice'", "'$latestDevice'"]} - ], - "maxDevices": '$max_devices' - }' \ - --name="$file_name-$CODEBUILD_SOURCE_VERSION" \ - --test="type=INSTRUMENTATION,testPackageArn=$test_package_upload_arn" \ - --execution-configuration="jobTimeoutMinutes=30,videoCapture=false" \ - --query="run.arn" \ - --output=text \ - --region="us-west-2"` + --app-arn="$app_package_upload_arn" \ + --device-selection-configuration='{ + "filters": [ + {"attribute": "ARN", "operator":"IN", "values":["'$minDevice'", "'$middleDevice'", "'$latestDevice'"]} + ], + "maxDevices": '$max_devices' + }' \ + --name="$file_name-$CODEBUILD_SOURCE_VERSION" \ + --test="testSpecArn=$test_spec_arn,type=INSTRUMENTATION,testPackageArn=$test_package_upload_arn" \ + --execution-configuration="jobTimeoutMinutes=30,videoCapture=false" \ + --query="run.arn" \ + --output=text \ + --region="us-west-2"` status='NONE' result='NONE' @@ -141,7 +142,11 @@ while true; do done echo "Status = $status Result = $result" -./scripts/generate_df_testrun_report --run_arn="$run_arn" --module_name="$module_name" --pr="$CODEBUILD_SOURCE_VERSION" --output_path="build/allTests/$module_name/" +./scripts/python/generate_df_testrun_report.py \ + -r "$run_arn" \ + -m "$module_name" \ + -o "build/allTests/$module_name/" + # If the result is PASSED, then exit with a return code 0 if [ "$result" = "PASSED" ] then