diff --git a/smart_tests/commands/subset.py b/smart_tests/commands/subset.py
index 83f57457c..ff1d3587b 100644
--- a/smart_tests/commands/subset.py
+++ b/smart_tests/commands/subset.py
@@ -400,7 +400,7 @@ def get_payload(self) -> dict[str, Any]:
 
     def _collect_potential_test_files(self):
         LOOSE_TEST_FILE_PATTERN = r'(\.(test|spec)\.|_test\.|Test\.|Spec\.|test/|tests/|__tests__/|src/test/)'
-        EXCLUDE_PATTERN = r'\.(xml|json|txt|yml|yaml|md)$'
+        EXCLUDE_PATTERN = r'(BUILD|Makefile|Dockerfile|LICENSE|.gitignore|.gitkeep|.keep|id_rsa|rsa|blank|taglib)|\.(xml|json|jsonl|txt|yml|yaml|toml|md|png|jpg|jpeg|gif|svg|sql|html|css|graphql|proto|gz|zip|rz|bzl|conf|config|snap|pem|crt|key|lock|jpi|hpi|jelly|properties|jar|ini|mod|sum|bmp|env|envrc|sh)$'  # noqa E501
 
         try:
             git_managed_files = subprocess.run(['git', 'ls-files'], stdout=subprocess.PIPE,
diff --git a/smart_tests/jar/exe_deploy.jar b/smart_tests/jar/exe_deploy.jar
index 98acb1c06..9a6df6e11 100755
Binary files a/smart_tests/jar/exe_deploy.jar and b/smart_tests/jar/exe_deploy.jar differ
diff --git a/smart_tests/test_runners/bazel.py b/smart_tests/test_runners/bazel.py
index 5fee678a3..e0acb622c 100644
--- a/smart_tests/test_runners/bazel.py
+++ b/smart_tests/test_runners/bazel.py
@@ -1,14 +1,15 @@
 import json
-import os
 import sys
+import xml.etree.ElementTree as ET
+from os import path
 from pathlib import Path
 from typing import Annotated, Generator, List
 
 from junitparser import TestCase, TestSuite  # type: ignore
 
+import smart_tests.args4p.converters as converters
 import smart_tests.args4p.typer as typer
 
-from ..args4p.converters import path
 from ..commands.subset import Subset
 from ..testpath import TestPath
 from ..utils.logger import Logger
@@ -44,7 +45,7 @@ def record_tests(
         "--build-event-json",
         help="set file path generated by --build_event_json_file",
         multiple=True,
-        type=path(exists=True)
+        type=converters.path(exists=True)
     )] = None,
 ):
     """
@@ -64,15 +65,45 @@ def f(case: TestCase, suite: TestSuite, report_file: str) -> TestPath:
 
         # last path component is the target, the rest is package
         # TODO: does this work correctly when on Windows?
-        path = make_test_path(os.path.dirname(pkgNtarget), os.path.basename(pkgNtarget))
+        tp = make_test_path(path.dirname(pkgNtarget), path.basename(pkgNtarget))
 
         # let the normal path building kicks in
-        path.extend(default_path_builder(case, suite, report_file))
-        return path
+        tp.extend(default_path_builder(case, suite, report_file))
+        return tp
 
     client.path_builder = f
     client.check_timestamp = False
 
+    def parse_func(report: str) -> ET.ElementTree:
+        """
+        test result XML generated by Bazel's java_test rule (and possibly others) do not capture system-out/system-err.
+        The whole thing gets captured separately as test.log file in the same directory as test.xml.
+        This limits our ability to do useful things with data, so we go out of our way to capture it.
+        Ideally Bazel should do this. test.log captures the entire output from the whole session, and therefore
+        it is incapable of splitting log between different test classes.
+        """
+        tree = ET.parse(report)
+        root = tree.getroot()
+        for ts in root.findall('testsuite'):
+            # be defensive -- future/non-Java test rules might capture those
+            if ts.findtext('system-out', '') == "" and ts.findtext('system-err', '') == "":
+                logfile = path.join(path.dirname(report), 'test.log')
+                if path.exists(logfile):
+                    print("Found {}".format(logfile))
+                    for x in ts.findall('system-out'):
+                        ts.remove(x)
+                    with open(logfile, 'r', encoding='utf-8', errors='replace') as log_file:
+                        log = log_file.read()
+
+                    for c in ts.findall('testcase'):
+                        if c.findtext('system-out', '') == "" and c.findtext('system-err', '') == "":
+                            system_out = ET.SubElement(c, 'system-out')
+                            system_out.text = log
+
+        return tree  # type: ignore
+
+    client.junitxml_parse_func = parse_func
+
     if build_event_json_files:
         for l in parse_build_event_json(build_event_json_files):
             if l is None:
diff --git a/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java b/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java
index 9b75bea2d..e94b6452c 100644
--- a/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java
+++ b/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java
@@ -89,6 +89,8 @@ public class CommitGraphCollector {
 
   private int maxDays;
 
+  private boolean reportAllFiles;
+
   private boolean audit;
 
   private boolean dryRun;
@@ -150,7 +152,7 @@ public void transfer(URL service, Authenticator authenticator, boolean enableTim
       }
       CloseableHttpResponse latestResponse = client.execute(new HttpGet(latestUrl.toExternalForm()));
       ImmutableList<ObjectId> advertised = getAdvertisedRefs(handleError(latestUrl, latestResponse));
-      honorMaxDaysHeader(latestResponse);
+      honorControlHeaders(latestResponse);
 
       // every time a new stream is needed, supply ByteArrayOutputStream, and when the data is all
       // written, turn around and ship that over
@@ -301,16 +303,21 @@ public void accept(VirtualFile f) {
     }
   }
 
-  /**
-   * When a user incorrectly configures shallow clone, the incremental nature of commit collection
-   * makes it really hard for us and users to collaboratively reset and repopulate the commit data.
-   * This server-side override mechanism makes it easier.
-   */
-  private void honorMaxDaysHeader(HttpResponse response) {
+  private void honorControlHeaders(HttpResponse response) {
+    // When a user incorrectly configures shallow clone, the incremental nature of commit collection
+    // makes it really hard for us and users to collaboratively reset and repopulate the commit data.
+    // This server-side override mechanism makes it easier.
     Header h = response.getFirstHeader("X-Max-Days");
     if (h!=null) {
       maxDays = Integer.parseInt(h.getValue());
     }
+    // File transfer is supposed to work incrementally, but we are investigating the problem where
+    // not all files get collected prior to commit collection, resulting in incomplete data on the server side.
+    // As a temporary mitigation, allow the server to request all files to be reported.
+    h = response.getFirstHeader("X-Report-All-Files");
+    if (h!=null) {
+      reportAllFiles = true;
+    }
   }
 
   private ImmutableList<ObjectId> getAdvertisedRefs(HttpResponse response) throws IOException {
@@ -427,16 +434,22 @@ public void transfer(Collection<ObjectId> advertised, Consumer<JSCommit> commitR
         // and it drastically cuts down the initial commit consumption of a new large repository.
         // ... except we do want to capture the head commit, as that makes it easier to spot integration problems
         // when `record build` and `record commit` are separated.
+
+        // two RevFilters are order sensitive. This is because CommitTimeRevFilter.after doesn't return false to
+        // filter things out, it throws StopWalkException to terminate the walk, never giving a chance for the other
+        // branch of OR to be evaluated. So we need to put ObjectRevFilter first.
         walk.setRevFilter(
             OrRevFilter.create(
-                CommitTimeRevFilter.after(System.currentTimeMillis() - TimeUnit.DAYS.toMillis(maxDays)),
-                new ObjectRevFilter(headId)));
+              new ObjectRevFilter(headId),
+              CommitTimeRevFilter.after(System.currentTimeMillis() - TimeUnit.DAYS.toMillis(maxDays))));
 
         for (ObjectId id : advertised) {
           try {
             RevCommit c = walk.parseCommit(id);
             walk.markUninteresting(c);
-            treeWalk.addTree(c.getTree());
+            if (!reportAllFiles) {
+              treeWalk.addTree(c.getTree());
+            }
           } catch (MissingObjectException e) {
             // it's possible that the server advertises a commit we don't have.
             //
diff --git a/tests/data/bazel/bazel-testlogs/src/test/java/com/ninjinkun/mylib_test1/test.log b/tests/data/bazel/bazel-testlogs/src/test/java/com/ninjinkun/mylib_test1/test.log
new file mode 100644
index 000000000..587889be1
--- /dev/null
+++ b/tests/data/bazel/bazel-testlogs/src/test/java/com/ninjinkun/mylib_test1/test.log
@@ -0,0 +1 @@
+Dummy standard output
diff --git a/tests/data/bazel/bazel-testlogs/src/test/java/com/ninjinkun/mylib_test1/test.xml b/tests/data/bazel/bazel-testlogs/src/test/java/com/ninjinkun/mylib_test1/test.xml
index c1cd39693..5aba9e3b5 100644
--- a/tests/data/bazel/bazel-testlogs/src/test/java/com/ninjinkun/mylib_test1/test.xml
+++ b/tests/data/bazel/bazel-testlogs/src/test/java/com/ninjinkun/mylib_test1/test.xml
@@ -1,7 +1,9 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <testsuites>
-  <testsuite name='com.ninjinkun.MyLibTest1' timestamp='2020-12-16T03:11:43.526Z' hostname='localhost' tests='1' failures='0' errors='0' time='2.02' package='' id='0'>
+  <testsuite name='com.ninjinkun.MyLibTest1' timestamp='2020-12-16T03:11:43.526Z' hostname='localhost' tests='1' failures='1' errors='0' time='2.02' package='' id='0'>
     <properties />
-    <testcase name='testFibonacci' classname='com.ninjinkun.MyLibTest1' time='2.02' />
+    <testcase name='testFibonacci' classname='com.ninjinkun.MyLibTest1' time='2.02'>
+      <failure message='Yo!' type='java.lang.Exception'>java.lang.Exception: Yo!
+        at somewhere(Somewhere.java:33)</failure></testcase>
     <system-out />
     <system-err /></testsuite></testsuites>
diff --git a/tests/data/bazel/record_test_result.json b/tests/data/bazel/record_test_result.json
index 0d8345d6d..b433b2efd 100644
--- a/tests/data/bazel/record_test_result.json
+++ b/tests/data/bazel/record_test_result.json
@@ -23,9 +23,9 @@
         { "type": "testcase", "name": "testFibonacci" }
       ],
       "duration": 2.02,
-      "status": 1,
-      "stdout": "",
-      "stderr": "",
+      "status": 0,
+      "stdout": "Dummy standard output\n",
+      "stderr": "java.lang.Exception: Yo!\n        at somewhere(Somewhere.java:33)",
       "data": null
     },
     {
diff --git a/tests/data/bazel/record_test_with_build_event_json_result.json b/tests/data/bazel/record_test_with_build_event_json_result.json
index 1afb8e2bd..033eeffb8 100644
--- a/tests/data/bazel/record_test_with_build_event_json_result.json
+++ b/tests/data/bazel/record_test_with_build_event_json_result.json
@@ -9,9 +9,9 @@
         { "type": "testcase", "name": "testFibonacci" }
       ],
       "duration": 2.02,
-      "status": 1,
-      "stdout": "",
-      "stderr": "",
+      "status": 0,
+      "stdout": "Dummy standard output\n",
+      "stderr": "java.lang.Exception: Yo!\n        at somewhere(Somewhere.java:33)",
       "data": null
     },
     {
diff --git a/tests/data/bazel/record_test_with_multiple_build_event_json_result.json b/tests/data/bazel/record_test_with_multiple_build_event_json_result.json
index 073f2b4ca..88ac14db9 100644
--- a/tests/data/bazel/record_test_with_multiple_build_event_json_result.json
+++ b/tests/data/bazel/record_test_with_multiple_build_event_json_result.json
@@ -9,9 +9,9 @@
         { "type": "testcase", "name": "testFibonacci" }
       ],
       "duration": 2.02,
-      "status": 1,
-      "stdout": "",
-      "stderr": "",
+      "status": 0,
+      "stdout": "Dummy standard output\n",
+      "stderr": "java.lang.Exception: Yo!\n        at somewhere(Somewhere.java:33)",
       "data": null
     },
     {