diff --git a/scripts/python/generate_df_testrun_report.py b/scripts/python/generate_df_testrun_report.py
new file mode 100755
index 0000000000..5a05f501e5
--- /dev/null
+++ b/scripts/python/generate_df_testrun_report.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+"""
+Python script that parses the Android Test Orchestrator's instrumentation logs for a given
+Device Farm test run and generates a user-readable Junit report.
+"""
+import os
+import argparse
+import dload
+import boto3
+import sys
+import logging
+from instrumentation_parser import Parser
+from metrics import *
+from junit_xml import TestSuite, TestCase
+
+LOG_FORMATTER = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+CONSOLE_HANDLER = logging.StreamHandler()
+CONSOLE_HANDLER.setFormatter(LOG_FORMATTER)
+LOGGER = logging.getLogger("DeviceFarmTestRunReportGenerator")
+LOGGER.setLevel(os.getenv("LOG_LEVEL") if os.getenv("LOG_LEVEL") is not None else "INFO")
+LOGGER.addHandler(CONSOLE_HANDLER)
+
+# Parse the required script arguments
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Utility that generates a report for a DeviceFarm test run.")
+    parser.add_argument("-r", "--run_arn", help="The ARN of the DeviceFarm test run.", required=True)
+    parser.add_argument("-m", "--module_name", help="The module name for the test suite.", required=True)
+    parser.add_argument("-o", "--output_path", help="Destination path for the build reports.", required=True)
+    return parser.parse_args()
+
+def main(arguments):
+    LOGGER.info(f"Starting to generate report...")
+    args = parse_arguments()
+
+    # The path that the unzipped Device Farm artifacts will be unzipped into
+    logs_dir = "build/allTests/{}".format(args.module_name)
+
+    # The path that the Device Farm instrumentation logs are in
+    log_file = logs_dir + "/Host_Machine_Files/$DEVICEFARM_LOG_DIR/instrument.log"
+
+    df_client = boto3.client(
+        'devicefarm',
+        region_name='us-west-2'
+    )
+
+    # For a particular Device Farm run, grab the list of all of the artifacts
+    response = df_client.list_artifacts(
+        arn=args.run_arn,
+        type="FILE"
+    )
+
+    # The instrumentation logs are stored in the "CUSTOMER_ARTIFACT" file for a test job
+    customer_artifacts = (artifact for artifact in response["artifacts"] if artifact["type"] == "CUSTOMER_ARTIFACT")
+
+    # A single test run may have multiple jobs where each job tests on a different device
+    # A regular PR typically tests on one device while a release PR typically tests on 3 devices
+    # The instrumentation logs for each job is uploaded as a separate CUSTOMER_ARTIFACT in the
+    # run's artifacts.
+    for job_no, customer_artifact in enumerate(customer_artifacts):
+        LOGGER.info(f"Parsing result for artifact ARN: {customer_artifact['arn']}")
+
+        unzip_result = dload.save_unzip(customer_artifact["url"], extract_path=logs_dir, delete_after=True)
+        if unzip_result is None or unzip_result == "":
+            LOGGER.error("Unzip of test run artifacts failed")
+            break
+
+        parser = Parser(args.module_name)
+        metrics = []
+        try:
+            # Open the provided file and then start to parse it
+            with open(log_file, "r") as file:
+                for line in file:
+                    try:
+                        parser.parse_line(line.strip())
+                    except Exception as e:
+                        exception_value, exception_location = get_exception(sys.exc_info())
+                        LOGGER.error(f"Encountered an exception trying to parse the results: {exception_value} at [{exception_location}] for line:\n{line.strip()}")
+
+            module_passing_tests = module_failing_tests = 0
+            LOGGER.info(f"\n--------------------------\nTest Suite Statistics\n--------------------------")
+            test_run_passing_tests = test_run_failing_tests = 0
+
+            # The Device Farm run ARN is in the format of:
+            # arn:aws:devicefarm:us-west-2:ACCOUNT_ID:job:PROJECT_ARN_ID/RUN_ARN_ID
+            # So split the run ARN by ':', take the last element, split it by '/' and then use each
+            # component to format a URL to add to the test report for easy access to the logs and output files
+            arn_components = args.run_arn.split(":")[-1].split("/")
+            run_url = f"https://us-west-2.console.aws.amazon.com/devicefarm/home#/mobile/projects/{arn_components[0]}/runs/{arn_components[1]}/jobs/00000"
+            debug_messaging = f"You can find the detailed logs and output files at {run_url}"
+
+            # Run through the parser results and translate them into the junit_xml data classes
+            # while also constructing the CloudWatch metrics
+            for test_suite_name, test_suite in parser.test_run.test_suites.items():
+                test_cases = []
+                for test_name, test in test_suite.tests.items():
+                    if test.status_code == 0 or test.status_code == -3 or test.status_code == -4:
+                        test_status = "PASSED"
+                    elif test.status_code == -1:
+                        test_status = "ERROR"
+                    else:
+                        test_status = "FAILED"
+
+                    tc = TestCase(test_name,
+                        classname=test_suite_name,
+                        stdout=f"{debug_messaging}\n{test.stack_trace}",
+                        status=test_status
+                    )
+
+                    if test_status == "FAILED":
+                        tc.add_failure_info(test.stack_trace)
+                    elif test_status == "ERROR":
+                        tc.add_error_info(test.stack_trace)
+
+                    test_cases.append(tc)
+
+                # Because a test run can have N number of test jobs, we need to distinguish each job's
+                # test result so we'll append an integer to it.
+                ts = TestSuite(test_suite_name + "-" + str(job_no), test_cases=test_cases)
+                ts_output = TestSuite.to_xml_string([ts])
+                LOGGER.info(f"Saving test suite {test_suite_name} report.")
+
+                if not os.path.exists(args.output_path):
+                    os.makedirs(args.output_path)
+                f = open(args.output_path + test_suite_name + "-" + str(job_no) + ".xml", "w")
+                f.write(ts_output)
+                f.close()
+
+                success_percentage = test_suite.passing_tests/(test_suite.passing_tests + test_suite.failing_tests)
+                LOGGER.info(f"Name: {test_suite_name}")
+                LOGGER.info(f"Passing Tests: {test_suite.passing_tests}")
+                LOGGER.info(f"Failing Tests: {test_suite.failing_tests}")
+                LOGGER.info(f"Success Percentage: {success_percentage}")
+                LOGGER.info(f"------------------------------------------------")
+                test_run_passing_tests += test_suite.passing_tests
+                module_passing_tests += test_suite.passing_tests
+                test_run_failing_tests += test_suite.failing_tests
+                module_failing_tests += test_suite.failing_tests
+                if (success_percentage < 1.0):
+                    parser.get_stack_traces(test_suite, metrics)
+
+                test_suite_dimension = [ get_dimension("Module", args.module_name), get_dimension("Test Suite", test_suite_name) ]
+
+                # Test Suite Success Percentage
+                metrics.append(get_metric("Test Success Percentage", test_suite_dimension, success_percentage, "Count"))
+
+                # Test Suite Success Count
+                metrics.append(get_metric("Tests Succeeded", test_suite_dimension, test_run_passing_tests, "Count"))
+
+                # Test Suite Failure Count
+                metrics.append(get_metric("Tests Failed", test_suite_dimension, test_run_failing_tests, "Count"))
+
+            LOGGER.info(f"\n--------------------------\nTest Run Statistics\n--------------------------")
+            LOGGER.info(f"Run Name: {args.module_name}")
+            LOGGER.info(f"Test Successes: {test_run_passing_tests}")
+            LOGGER.info(f"Test Failures: {test_run_failing_tests}")
+            success_percentage = test_run_passing_tests/(test_run_passing_tests + test_run_failing_tests)
+            LOGGER.info(f"Success Percentage: {success_percentage}")
+            LOGGER.info(f"Test Run Execution Time: {parser.execution_time}")
+
+            module_dimension = [ get_dimension("Module", args.module_name) ]
+            success_percentage = module_passing_tests/(module_passing_tests + module_failing_tests)
+            # Test Run Success Percentage
+            metrics.append(get_metric("Test Success Percentage", module_dimension, success_percentage, "Count"))
+            # Test Run Success Count
+            metrics.append(get_metric("Tests Succeeded", module_dimension, module_passing_tests, "Count"))
+            # Test Run Failure Count
+            metrics.append(get_metric("Tests Failed", module_dimension, module_failing_tests, "Count"))
+            # Test Run Execution Time
+            metrics.append(get_metric("Execution Time", module_dimension, float(parser.execution_time), "Seconds"))
+        except Exception as e:
+            exception_value, exception_location = get_exception(sys.exc_info())
+
+            LOGGER.error(f"Encountered an exception trying to parse the results: {exception_value} at [{exception_location}]")
+            exception_dimensions = [ get_dimension("Exception", exception_value), get_dimension("Line Number", exception_location) ]
+            metrics.append(get_metric("Test Run Reporting Error", exception_dimensions, 1, "Count"))
+            print(f"Adding metric [{get_metric('Test Run Reporting Error', exception_dimensions, 1, 'Count')}]")
+
+        # Now that the logs have been parsed and metrics have been gathered, we publish the metrics
+        # to CloudWatch.
+        try:
+            cw_client = boto3.client(
+                'cloudwatch',
+                region_name='us-east-1'
+            )
+
+            response = cw_client.put_metric_data(
+                Namespace='AmplifyAndroidV2-IntegTests',
+                MetricData=metrics
+            )
+            LOGGER.info(response)
+        except Exception as e:
+            exception_value, exception_location = get_exception(sys.exc_info())
+            LOGGER.error(f"Encountered an exception trying to parse the results: {exception_value} at [{exception_location}]")
+            LOGGER.error(f"The metrics that were attempted to be published: {metrics}")
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
diff --git a/scripts/python/instrumentation_parser.py b/scripts/python/instrumentation_parser.py
new file mode 100755
index 0000000000..6f7ccd0d8f
--- /dev/null
+++ b/scripts/python/instrumentation_parser.py
@@ -0,0 +1,165 @@
+"""
+Python script that parses the Android Test Orchestrator's instrumentation logs line by line
+and stores the results in memory.
+"""
+import re
+from dataclasses import dataclass
+from typing import Dict
+from metrics import *
+
+# Android InstrumentationResultParser for inspiration
+# https://cs.android.com/android-studio/platform/tools/base/+/mirror-goog-studio-main:ddmlib/src/main/java/com/android/ddmlib/testrunner/InstrumentationResultParser.java;l=85?q=InstrumentationResultParser.java
+CLASS_PREFIX = "INSTRUMENTATION_STATUS: class="
+CURRENT_TEST_PREFIX = "INSTRUMENTATION_STATUS: current="
+NUM_TESTS_PREFIX = "INSTRUMENTATION_STATUS: numtests="
+STREAM_PREFIX = "INSTRUMENTATION_STATUS: stream="
+TEST_NAME_PREFIX = "INSTRUMENTATION_STATUS: test="
+STATUS_CODE = "INSTRUMENTATION_STATUS_CODE:"
+STACK_TRACE_PREFIX = "INSTRUMENTATION_STATUS: stack="
+
+# These prefixes will always show up but we don't care about them for metrics reasoning
+# Instead we use these to see if there are any other instrumentation logs that we don't recognize as known
+ID_PREFIX = "INSTRUMENTATION_STATUS: id="
+RESULT_STREAM_PREFIX = "INSTRUMENTATION_RESULT: stream="
+CODE_PREFIX = "INSTRUMENTATION_CODE:"
+
+PACKAGE_NAMESPACE_PREFIX = "com.amplifyframework."
+TIME_PREFIX = "Time: "
+
+@dataclass
+class Test:
+    """The atomic test"""
+    # test_name
+    name: str
+    stack_trace: str = None
+
+    """
+    Instrumentation Status Code meanings:
+    1: Start
+    2: In Progress
+    -4: Assumption failed
+    -3: Ignored
+    -2: Failure
+    -1: Error
+    0: OK
+
+    https://cs.android.com/android-studio/platform/tools/base/+/mirror-goog-studio-main:ddmlib/src/main/java/com/android/ddmlib/testrunner/IInstrumentationResultParser.java;l=62?q=StatusKey
+    """
+    status_code: int = 1
+
+@dataclass
+class TestSuite:
+    """A suite that contains many tests (i.e. the class)"""
+    # class_name
+    name: str
+    # test_name: test
+    tests: Dict[str, Test]
+    passing_tests: int = 0
+    failing_tests: int = 0
+
+@dataclass
+class TestRun:
+    """A test run that contains many test suites (i.e. the module)"""
+    # module_name
+    name: str
+    # class_name: test_suite
+    test_suites: Dict[str, TestSuite]
+
+    def contains_suite(name):
+        return test_suites.get(name)
+
+class Parser:
+    def __init__(
+        self,
+        module_name
+    ):
+        self.module_name = module_name
+        self.stack_trace = ""
+        self.execution_time = 0
+        self.class_name = ""
+        self.test_run = None
+
+    def is_relevant_stacktrace(self, line):
+        return "error" in line.lower() or "exception" in line.lower() or PACKAGE_NAMESPACE_PREFIX in line.lower()
+
+    def get_stack_traces(self, test_suite, metrics):
+        pattern = r"@(\w{7})"
+        replacement = "@[JAVA_HASH_CODE]"
+
+        failure_status_codes = {-1, -2}
+        filtered_tests = {k: v for k, v in test_suite.tests.items() if v.status_code in failure_status_codes}
+        for test_name, test in filtered_tests.items():
+            sanitized_error = re.sub(pattern, replacement, ascii(test.stack_trace[0:500]))
+            stack_trace_dimensions = [
+                get_dimension("Module", self.module_name),
+                get_dimension("Test Suite", test_suite.name),
+                get_dimension("Test", test_name),
+                get_dimension("Exception", sanitized_error)
+            ]
+            metrics.append(get_metric("Test Failure", stack_trace_dimensions, 1.0, "Count"))
+
+    def parse_line(self, line):
+        line = line.strip()
+
+        global test_num, num_tests, test_name, status_code
+        global test_suite, test, test_run_error, instrumentation_failure
+
+        if CLASS_PREFIX in line:
+            class_tokens = line.replace(CLASS_PREFIX + PACKAGE_NAMESPACE_PREFIX, "").strip().split(".")
+            # Class Name == Test Suite name
+            self.class_name = class_tokens.pop()
+
+            if self.test_run is None:
+                # Module doesn't exist yet which means the test suite and test don't either
+                test_suite = TestSuite(name=self.class_name, tests={})
+                self.test_run = TestRun(name=self.module_name, test_suites={})
+            else:
+                if self.test_run.test_suites.get(self.class_name) is None:
+                    # Module exists but Test Suite doesn't
+                    test_suite = TestSuite(name=self.class_name, tests={})
+                else:
+                    test_suite = self.test_run.test_suites.get(self.class_name)
+        elif CURRENT_TEST_PREFIX in line:
+            test_num = line.replace(CURRENT_TEST_PREFIX, "").strip()
+        elif NUM_TESTS_PREFIX in line:
+            num_tests = line.replace(NUM_TESTS_PREFIX, "").strip()
+        elif STREAM_PREFIX in line:
+            read_line = line.replace(STREAM_PREFIX, "").strip()
+            self.stack_trace = read_line
+        elif STACK_TRACE_PREFIX in line:
+            read_line = line.replace(STACK_TRACE_PREFIX, "").strip()
+            self.stack_trace = read_line
+        elif TEST_NAME_PREFIX in line:
+            test_name = line.replace(TEST_NAME_PREFIX, "").strip()
+            if test_suite.tests.get(test_name) is None:
+                # First check if the test exists already
+                # Initialize the new test
+                test = Test(name=test_name)
+                # Update it in the test suite
+                test_suite.tests[test_name] = test
+                self.test_run.test_suites[self.class_name] = test_suite
+        elif STATUS_CODE in line:
+            status_code = line.replace(STATUS_CODE, "").strip()
+            self.test_run.test_suites.get(self.class_name).tests.get(test_name).status_code = int(status_code)
+            if status_code == "0":
+                self.test_run.test_suites.get(self.class_name).passing_tests += 1
+            if status_code == "-2":
+                print(f"Test #{test_num}: [{self.module_name}] // [{self.class_name}#{test_name}] FAILED")
+                print(f"--- Stacktrace: [{self.stack_trace}]")
+                self.test_run.test_suites.get(self.class_name).tests.get(test_name).stack_trace = self.stack_trace
+                self.test_run.test_suites.get(self.class_name).failing_tests += 1
+            # The status code acts as a delimiter for a test case so we can clear out the stack trace
+                self.stack_trace = ""
+        elif TIME_PREFIX in line:
+            self.execution_time = line.replace(TIME_PREFIX, "").strip().replace(',','')
+            print(f"Setting time: {self.execution_time}")
+        elif "INSTRUMENTATION_" not in line:
+            # This line is likely a continuation of the ongoing stream so append to it
+            if self.is_relevant_stacktrace(line):
+                if self.stack_trace.isspace() or self.stack_trace == "":
+                    self.stack_trace = line.replace("Error in ", "").strip()
+                else:
+                    self.stack_trace = self.stack_trace + " // " + line
+        elif ID_PREFIX not in line and RESULT_STREAM_PREFIX not in line and CODE_PREFIX not in line:
+            # If there is a line that we don't expect, print it out for debugging
+            print(f"Found a line that hasn't been parsed: {line}")
\ No newline at end of file
diff --git a/scripts/python/metrics.py b/scripts/python/metrics.py
new file mode 100755
index 0000000000..acd23088f5
--- /dev/null
+++ b/scripts/python/metrics.py
@@ -0,0 +1,30 @@
+import re
+
+def get_dimension(name, value):
+    return {
+        "Name": name,
+        "Value": value
+    }
+
+def get_metric(name, dimensions, value, unit):
+    return {
+        "MetricName": name,
+        "Dimensions": dimensions,
+        "Value": value,
+        "Unit": unit
+    }
+
+def get_exception(exc_info):
+    filename_regex = "(\w+\.py)"
+    exception_location = ""
+    exc_type, exc_value, exc_traceback = exc_info
+    tb = exc_traceback
+    while tb is not None:
+        frame = tb.tb_frame
+        exception_location += re.findall(filename_regex, frame.f_code.co_filename)[0] + " @ " + \
+            frame.f_code.co_name + "#" + str(tb.tb_lineno)
+        tb = tb.tb_next
+        if tb is not None:
+            exception_location += " >> "
+
+    return exc_value, exception_location
\ No newline at end of file
diff --git a/scripts/run_test_in_devicefarm.sh b/scripts/run_test_in_devicefarm.sh
index 30ef43b63f..9023e6d702 100755
--- a/scripts/run_test_in_devicefarm.sh
+++ b/scripts/run_test_in_devicefarm.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 project_arn=$DEVICEFARM_PROJECT_ARN
 max_devices=$NUMBER_OF_DEVICES_TO_TEST
+test_spec_arn=$DEVICEFARM_TEST_SPEC_ARN
 module_name=$1
 file_name="$module_name-debug-androidTest.apk"
 full_path="$module_name/build/outputs/apk/androidTest/debug/$file_name"
@@ -111,19 +112,19 @@ stopDuplicates
 # Schedule the test run in device farm
 echo "Scheduling test run"
 run_arn=`aws devicefarm schedule-run --project-arn=$project_arn \
-                            --app-arn="$app_package_upload_arn" \
-                            --device-selection-configuration='{
-                                "filters": [
-                                  {"attribute": "ARN", "operator":"IN", "values":["'$minDevice'", "'$middleDevice'", "'$latestDevice'"]}
-                                ],
-                                "maxDevices": '$max_devices'
-                            }' \
-                            --name="$file_name-$CODEBUILD_SOURCE_VERSION" \
-                            --test="type=INSTRUMENTATION,testPackageArn=$test_package_upload_arn" \
-                            --execution-configuration="jobTimeoutMinutes=30,videoCapture=false" \
-                            --query="run.arn" \
-                            --output=text \
-                            --region="us-west-2"`
+  --app-arn="$app_package_upload_arn" \
+  --device-selection-configuration='{
+      "filters": [
+        {"attribute": "ARN", "operator":"IN", "values":["'$minDevice'", "'$middleDevice'", "'$latestDevice'"]}
+      ],
+      "maxDevices": '$max_devices'
+  }' \
+  --name="$file_name-$CODEBUILD_SOURCE_VERSION" \
+  --test="testSpecArn=$test_spec_arn,type=INSTRUMENTATION,testPackageArn=$test_package_upload_arn" \
+  --execution-configuration="jobTimeoutMinutes=30,videoCapture=false" \
+  --query="run.arn" \
+  --output=text \
+  --region="us-west-2"`
 
 status='NONE'
 result='NONE'
@@ -141,7 +142,11 @@ while true; do
 done
 echo "Status = $status Result = $result"
 
-./scripts/generate_df_testrun_report --run_arn="$run_arn" --module_name="$module_name" --pr="$CODEBUILD_SOURCE_VERSION" --output_path="build/allTests/$module_name/"
+./scripts/python/generate_df_testrun_report.py \
+  -r "$run_arn" \
+  -m "$module_name" \
+  -o "build/allTests/$module_name/"
+
 # If the result is PASSED, then exit with a return code 0
 if [ "$result" = "PASSED" ]
 then