lanl · mikegros · Feb 5, 2026 · Feb 4, 2026
diff --git a/examples/single_agent_examples/hypothesizer_agent/README.md b/examples/single_agent_examples/hypothesizer_agent/README.md
@@ -10,9 +10,8 @@ On a mac you need:
 ```
 brew update
 brew install ocrmypdf tesseract
-# NOTE: Feb 1, 2026 - gettext did not install on my mac so had to
-#       build from source, this is LENGTHY process, but 100%
-#       works:
+# NOTE: Feb 1, 2026 - gettext did not install on my Mac so had to
+#       build from source. This is a LENGTHY but reliable process:
 #       brew install --build-from-source gettext
 #       once gettext is installed, you can go back to
 #       brew install ocrmypdf
@@ -31,8 +30,9 @@ Note that the first `[OCR]` line will only show up if the PDF reading fails and
 are no text layers discovered (this `skips` some complex / lengthy OCR techniques
 and tries a quick and dirty one.).
 
-Note that the second `[OCR]` line will only show up if the `skip` version
-still produced no good data to read, this is called the `force` version.
+Note that the second `[OCR]` line will show up only if the `skip` version
+still produced no good data to read. This is called the `force` version.
 
-Once a doc has been OCRed (either version) the reader will pick this up automatically
-in the future (ie it will only run this the first time it needs to).
+Once a doc has been OCRed (either version) the reader will automatically
+remember this for the future (i.e. it will run this only the first time it
+needs to).
diff --git a/src/ursa/tools/read_file_tool.py b/src/ursa/tools/read_file_tool.py
@@ -1,29 +1,41 @@
 import os
+import shutil
 import subprocess
 from pathlib import Path
 
 from langchain.tools import ToolRuntime
 from langchain_core.tools import tool
+from pypdf import PdfReader
 
 from ursa.agents.base import AgentContext
 from ursa.util.parse import read_pdf_text, read_text_file
 
 
-def _pdf_page_count(path: str) -> int:
+def _pdf_page_count(path: Path) -> int:
     try:
-        from pypdf import PdfReader
-
         return len(PdfReader(path).pages)
-    except Exception:
+    except Exception as e:
+        print("[Error]: ", e)
         return 0
 
 
+def ocrmypdf_is_installed() -> bool:
+    return shutil.which("ocrmypdf") is not None
+
+
 def _ocr_to_searchable_pdf(
     src_pdf: str, out_pdf: str, *, mode: str = "skip"
 ) -> None:
     # mode:
     #  - "skip":  only OCR pages that look like they need it (your current behavior)
     #  - "force": rasterize + OCR everything (fixes vector/outlined “no images” PDFs)
+    if not ocrmypdf_is_installed():
+        raise ImportError(
+            "ocrmypdf was not found in your path. "
+            "See installation instructions:"
+            "https://github.com/ocrmypdf/OCRmyPDF?tab=readme-ov-file#installation"
+        )
+
     cmd = ["ocrmypdf", "--rotate-pages", "--deskew", "--clean"]
 
     if mode == "force":