diff --git a/AUTHORS b/AUTHORS
index a089ca678f7..75bf8090cf6 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -395,6 +395,7 @@ Roland Puntaier
 Romain Dorgueil
 Roman Bolshakov
 Ronny Pfannschmidt
+Roni Kishner
 Ross Lawley
 Ruaridh Williamson
 Russel Winder
diff --git a/changelog/13986.bugfix.rst b/changelog/13986.bugfix.rst
new file mode 100644
index 00000000000..0bf08bed4f2
--- /dev/null
+++ b/changelog/13986.bugfix.rst
@@ -0,0 +1 @@
+Fixed double-counting of subtest failures in the final test summary. Subtest failures are now reported separately as "subtests failed" instead of being counted as regular "failed" tests, providing clearer statistics. For example, a test with 3 subtests where 1 fails and 2 pass now shows "1 failed, 1 subtests failed, 2 subtests passed" instead of "2 failed, 2 subtests passed".
diff --git a/src/_pytest/pytester.py b/src/_pytest/pytester.py
index 1cd5f05dd7e..6694c87fa43 100644
--- a/src/_pytest/pytester.py
+++ b/src/_pytest/pytester.py
@@ -510,8 +510,9 @@ def _config_for_test() -> Generator[Config]:
 
 # Regex to match the session duration string in the summary: "74.34s".
 rex_session_duration = re.compile(r"\d+\.\d\ds")
-# Regex to match all the counts and phrases in the summary line: "34 passed, 111 skipped".
-rex_outcome = re.compile(r"(\d+) (\w+)")
+# Regex to match all the counts and phrases in the summary line:
+# "34 passed, 111 skipped, 3 subtests passed, 1 subtests failed".
+rex_outcome = re.compile(r"(\d+) ([\w\s]+?)(?=,| in|$)")
 
 
 @final
@@ -578,7 +579,7 @@ def parse_summary_nouns(cls, lines) -> dict[str, int]:
         for line in reversed(lines):
             if rex_session_duration.search(line):
                 outcomes = rex_outcome.findall(line)
-                ret = {noun: int(count) for (count, noun) in outcomes}
+                ret = {noun.strip(): int(count) for (count, noun) in outcomes}
                 break
         else:
             raise ValueError("Pytest terminal summary report not found")
@@ -586,6 +587,9 @@ def parse_summary_nouns(cls, lines) -> dict[str, int]:
         to_plural = {
             "warning": "warnings",
             "error": "errors",
+            "subtest failed": "subtests failed",
+            "subtest passed": "subtests passed",
+            "subtest skipped": "subtests skipped",
         }
         return {to_plural.get(k, k): v for k, v in ret.items()}
 
diff --git a/src/_pytest/subtests.py b/src/_pytest/subtests.py
index e0ceb27f4b1..89f9070971e 100644
--- a/src/_pytest/subtests.py
+++ b/src/_pytest/subtests.py
@@ -387,7 +387,7 @@ def pytest_report_teststatus(
             return category, short, f"{status}{description}"
 
         if report.failed:
-            return outcome, "u", f"SUBFAILED{description}"
+            return "subtests failed", "u", f"SUBFAILED{description}"
         else:
             if report.passed:
                 if quiet:
diff --git a/src/_pytest/terminal.py b/src/_pytest/terminal.py
index 4517b05bdee..361a63055e8 100644
--- a/src/_pytest/terminal.py
+++ b/src/_pytest/terminal.py
@@ -1387,7 +1387,7 @@ def _get_main_color(self) -> tuple[str, list[str]]:
 
     def _determine_main_color(self, unknown_type_seen: bool) -> str:
         stats = self.stats
-        if "failed" in stats or "error" in stats:
+        if "failed" in stats or "error" in stats or "subtests failed" in stats:
             main_color = "red"
         elif "warnings" in stats or "xpassed" in stats or unknown_type_seen:
             main_color = "yellow"
diff --git a/testing/test_subtests.py b/testing/test_subtests.py
index 6849df53622..c6b313633ef 100644
--- a/testing/test_subtests.py
+++ b/testing/test_subtests.py
@@ -55,7 +55,7 @@ def test_zaz(subtests):
         [
             "test_*.py uFuF.    *     [[]100%[]]",
             *summary_lines,
-            "* 4 failed, 1 passed in *",
+            "* 2 failed, 1 passed, 2 subtests failed in *",
         ]
     )
 
@@ -69,7 +69,7 @@ def test_zaz(subtests):
             "test_*.py::test_zaz SUBPASSED[[]zaz subtest[]]    *     [[]100%[]]",
             "test_*.py::test_zaz PASSED                        *     [[]100%[]]",
             *summary_lines,
-            "* 4 failed, 1 passed, 1 subtests passed in *",
+            "* 2 failed, 1 passed, 1 subtests passed, 2 subtests failed in *",
         ]
     )
     pytester.makeini(
@@ -87,7 +87,7 @@ def test_zaz(subtests):
             "test_*.py::test_bar FAILED                        *     [[] 66%[]]",
             "test_*.py::test_zaz PASSED                        *     [[]100%[]]",
             *summary_lines,
-            "* 4 failed, 1 passed in *",
+            "* 2 failed, 2 subtests failed, 1 passed in *",
         ]
     )
     result.stdout.no_fnmatch_line("test_*.py::test_zaz SUBPASSED[[]zaz subtest[]]*")
@@ -307,7 +307,7 @@ def test_foo(subtests, x):
             "*.py::test_foo[[]1[]] SUBFAILED[[]custom[]] (i=1) *[[]100%[]]",
             "*.py::test_foo[[]1[]] FAILED                      *[[]100%[]]",
             "contains 1 failed subtest",
-            "* 4 failed, 4 subtests passed in *",
+            "* 2 failed, 4 subtests passed, 2 subtests failed in *",
         ]
     )
 
@@ -325,7 +325,7 @@ def test_foo(subtests, x):
             "*.py::test_foo[[]1[]] SUBFAILED[[]custom[]] (i=1) *[[]100%[]]",
             "*.py::test_foo[[]1[]] FAILED                      *[[]100%[]]",
             "contains 1 failed subtest",
-            "* 4 failed in *",
+            "* 2 failed, 2 subtests failed in *",
         ]
     )
 
@@ -344,7 +344,7 @@ def test_foo(subtests):
     result = pytester.runpytest("-v")
     result.stdout.fnmatch_lines(
         [
-            "* 2 failed, 2 subtests passed in *",
+            "* 1 failed, 2 subtests passed, 1 subtests failed in *",
         ]
     )
 
@@ -365,7 +365,7 @@ def test_foo(subtests):
     result.stdout.fnmatch_lines(
         [
             "*AssertionError: top-level failure",
-            "* 2 failed, 2 subtests passed in *",
+            "* 1 failed, 2 subtests passed, 1 subtests failed in *",
         ]
     )
 
@@ -386,7 +386,7 @@ def test_foo(subtests):
     result = pytester.runpytest("-v")
     result.stdout.fnmatch_lines(
         [
-            "* 2 failed, 2 subtests passed in *",
+            "* 1 failed, 2 subtests passed, 1 subtests failed in *",
         ]
     )
 
@@ -427,7 +427,7 @@ def test_zaz(self):
         result = pytester.runpytest()
         result.stdout.fnmatch_lines(
             [
-                "* 3 failed, 2 passed in *",
+                "* 1 failed, 2 passed, 1 subtests passed, 2 subtests failed in *",
             ]
         )
 
@@ -814,7 +814,7 @@ def test(subtests):
         result = pytester.runpytest("-p no:logging")
         result.stdout.fnmatch_lines(
             [
-                "*2 failed in*",
+                "*1 failed, 1 subtests failed in*",
             ]
         )
         result.stdout.no_fnmatch_line("*root:test_no_logging.py*log line*")
@@ -899,12 +899,15 @@ def test_foo(subtests):
         """
     )
     result = pytester.runpytest("--exitfirst")
-    assert result.parseoutcomes()["failed"] == 2
+    outcomes = result.parseoutcomes()
+    assert outcomes["failed"] == 1
+    assert outcomes["subtests failed"] == 1
     result.stdout.fnmatch_lines(
         [
-            "SUBFAILED*[[]sub1[]] *.py::test_foo - assert False*",
-            "FAILED *.py::test_foo - assert False",
-            "* stopping after 2 failures*",
+            "*=== short test summary info ===*",
+            "*FAILED*test_foo*",
+            "*stopping after 2 failures*",
+            "*1 failed, 1 subtests failed*",
         ],
         consecutive=True,
     )