diff --git a/src/lmstudio/async_api.py b/src/lmstudio/async_api.py
index 028ced4..ad2c831 100644
--- a/src/lmstudio/async_api.py
+++ b/src/lmstudio/async_api.py
@@ -34,8 +34,8 @@
 from .history import (
     Chat,
     ChatHistoryDataDict,
-    _FileHandle,
-    _FileInputType,
+    FileHandle,
+    _FileCacheInputType,
     _LocalFileData,
 )
 from .json_api import (
@@ -581,18 +581,18 @@ class _AsyncSessionFiles(AsyncSession):
 
     API_NAMESPACE = "files"
 
-    async def _fetch_file_handle(self, file_data: _LocalFileData) -> _FileHandle:
+    async def _fetch_file_handle(self, file_data: _LocalFileData) -> FileHandle:
         handle = await self.remote_call("uploadFileBase64", file_data._as_fetch_param())
         # Returned dict provides the handle identifier, file type, and size in bytes
         # Add the extra fields needed for a FileHandle (aka ChatMessagePartFileData)
         handle["name"] = file_data.name
         handle["type"] = "file"
-        return load_struct(handle, _FileHandle)
+        return load_struct(handle, FileHandle)
 
     @sdk_public_api_async()
     async def _add_temp_file(
-        self, src: _FileInputType, name: str | None = None
-    ) -> _FileHandle:
+        self, src: _FileCacheInputType, name: str | None = None
+    ) -> FileHandle:
         """Add a file to the server."""
         # Private until LM Studio file handle support stabilizes
         file_data = _LocalFileData(src, name)
@@ -795,7 +795,7 @@ async def _load_new_instance(
         on_load_progress: ModelLoadingCallback | None,
     ) -> TAsyncModelHandle:
         channel_type = self._API_TYPES.REQUEST_NEW_INSTANCE
-        config_type = self._API_TYPES.MODEL_LOAD_CONFIG
+        config_type: type[TLoadConfig] = self._API_TYPES.MODEL_LOAD_CONFIG
         endpoint = LoadModelEndpoint(
             model_key,
             instance_identifier,
@@ -847,7 +847,7 @@ async def list_downloaded(self) -> Sequence[TAsyncDownloadedModel]:
         models = await self._system_session.list_downloaded_models()
         return [m for m in models if self._is_relevant_model(m)]
 
-    async def _fetch_file_handle(self, file_data: _LocalFileData) -> _FileHandle:
+    async def _fetch_file_handle(self, file_data: _LocalFileData) -> FileHandle:
         return await self._files_session._fetch_file_handle(file_data)
 
 
@@ -1054,7 +1054,6 @@ async def _respond_stream(
         """Request a response in an ongoing assistant chat session and stream the generated tokens."""
         if not isinstance(history, Chat):
             history = Chat.from_history(history)
-        await history._fetch_file_handles_async(self._fetch_file_handle)
         endpoint = ChatResponseEndpoint(
             model_specifier,
             history,
@@ -1078,7 +1077,6 @@ async def _apply_prompt_template(
         """Apply a prompt template to the given history."""
         if not isinstance(history, Chat):
             history = Chat.from_history(history)
-        await history._fetch_file_handles_async(self._fetch_file_handle)
         if not isinstance(opts, LlmApplyPromptTemplateOpts):
             opts = LlmApplyPromptTemplateOpts.from_dict(opts)
         params = LlmRpcApplyPromptTemplateParameter._from_api_dict(
@@ -1505,8 +1503,8 @@ def repository(self) -> AsyncSessionRepository:
     # Convenience methods
     @sdk_public_api_async()
     async def _add_temp_file(
-        self, src: _FileInputType, name: str | None = None
-    ) -> _FileHandle:
+        self, src: _FileCacheInputType, name: str | None = None
+    ) -> FileHandle:
         """Add a file to the server."""
         # Private until LM Studio file handle support stabilizes
         return await self._files._add_temp_file(src, name)
diff --git a/src/lmstudio/history.py b/src/lmstudio/history.py
index cb77961..1913253 100644
--- a/src/lmstudio/history.py
+++ b/src/lmstudio/history.py
@@ -43,54 +43,45 @@
     ChatMessageDataSystem as SystemPrompt,
     ChatMessageDataAssistant as AssistantResponse,
     ChatMessageDataTool as ToolResultMessage,
-    # Private until LM Studio file handle support stabilizes
-    ChatMessagePartFileData as _FileHandle,
-    ChatMessagePartFileDataDict as _FileHandleDict,
+    ChatMessagePartFileData as FileHandle,
+    ChatMessagePartFileDataDict as FileHandleDict,
     ChatMessagePartTextData as TextData,
     ChatMessagePartTextDataDict as TextDataDict,
     ChatMessagePartToolCallRequestData as ToolCallRequestData,
     ChatMessagePartToolCallRequestDataDict as ToolCallRequestDataDict,
     ChatMessagePartToolCallResultData as ToolCallResultData,
     ChatMessagePartToolCallResultDataDict as ToolCallResultDataDict,
-    # Private until LM Studio file handle support stabilizes
-    # FileType,
     FilesRpcUploadFileBase64Parameter,
+    FileType as FileHandleType,
     ToolCallRequest as ToolCallRequest,
     FunctionToolCallRequestDict as ToolCallRequestDict,
 )
 
 __all__ = [
+    "AnyChatMessage",
+    "AnyChatMessageDict",
+    "AssistantResponse",
     "AssistantResponseContent",
     "Chat",
     "ChatHistoryData",
     "ChatHistoryDataDict",
-    "AnyChatMessage",
-    "AnyChatMessageDict",
-    "UserMessage",
-    "SystemPrompt",
-    "AssistantResponse",
-    "ToolResultMessage",
-    # Private until LM Studio file handle support stabilizes
-    "_FileHandle",  # Other modules need this to be exported
-    "_FileHandleDict",  # Other modules need this to be exported
-    # "FileType",
-    # "FileHandle",
-    # "FileHandleDict",
+    # Private until file handle caching support is part of the published SDK API
     # "FetchFileHandle",
+    "FileHandle",
+    "FileHandleDict",
+    "FileHandleInput",
+    "FileHandleType",
+    "SystemPrompt",
     "SystemPromptContent",
-    "TextData",
-    "TextDataDict",
-    # Private until user level tool call request management is defined
     "ToolCallRequest",
     "ToolCallResultData",
-    # "ToolCallRequest",
-    # "ToolCallResult",
+    "TextData",
+    "TextDataDict",
+    "ToolResultMessage",
+    "UserMessage",
     "UserMessageContent",
 ]
 
-# Private until LM Studio file handle support stabilizes
-_FileInputType = BinaryIO | bytes | str | os.PathLike[str]
-
 # A note on terminology:
 #
 # In the chat history API, "prompt" specifically refers to "system prompts",
@@ -103,17 +94,19 @@
 # * when applying prompt templates, the entire resulting chat context history
 #   is referred to as the "chat prompt"
 
+FileHandleInput = FileHandle | FileHandleDict
+
 # Note: ChatMessageDataSystem nominally allows file handles in its content field,
 #       but that's only for internal use within the LM Studio plugin system
 SystemPromptContent = TextData
 SystemPromptContentDict = TextDataDict
-UserMessageContent = TextData | _FileHandle
-UserMessageContentDict = TextDataDict | _FileHandleDict
-AssistantResponseContent = TextData | _FileHandle
-AssistantResponseContentDict = TextDataDict | _FileHandleDict
-ChatMessageContent = TextData | _FileHandle | ToolCallRequestData | ToolCallResultData
+UserMessageContent = TextData | FileHandle
+UserMessageContentDict = TextDataDict | FileHandleDict
+AssistantResponseContent = TextData | FileHandle
+AssistantResponseContentDict = TextDataDict | FileHandleDict
+ChatMessageContent = TextData | FileHandle | ToolCallRequestData | ToolCallResultData
 ChatMessageContentDict = (
-    TextDataDict | _FileHandleDict | ToolCallRequestData | ToolCallResultDataDict
+    TextDataDict | FileHandleDict | ToolCallRequestData | ToolCallResultDataDict
 )
 
 
@@ -152,66 +145,6 @@ def _is_chat_message_input(value: AnyChatMessageInput) -> TypeIs[ChatMessageInpu
     return isinstance(value, (str, Mapping)) or not isinstance(value, Iterable)
 
 
-def _get_file_details(src: _FileInputType) -> Tuple[str, bytes]:
-    """Read file contents as binary data and generate a suitable default name."""
-    if isinstance(src, bytes):
-        # We interpreter bytes as raw data, not a bytes filesystem path
-        data = src
-        name = str(uuid.uuid4())
-    elif hasattr(src, "read"):
-        try:
-            data = src.read()
-        except OSError as exc:
-            err_msg = f"Error while reading {src!r} ({exc!r})"
-            raise LMStudioOSError(err_msg) from None
-        name = getattr(src, "name", str(uuid.uuid4()))
-    else:
-        try:
-            src_path = Path(src)
-        except Exception as exc:
-            err_msg = f"Expected file-like object, filesystem path, or bytes ({exc!r})"
-            raise LMStudioValueError(err_msg) from None
-        try:
-            data = src_path.read_bytes()
-        except OSError as exc:
-            err_msg = f"Error while reading {str(src_path)!r} ({exc!r})"
-            raise LMStudioOSError(err_msg) from None
-        name = str(src_path.name)
-    return name, data
-
-
-_ContentHash: TypeAlias = bytes
-_FileHandleCacheKey: TypeAlias = tuple[str, _ContentHash]
-
-
-# Private until LM Studio file handle support stabilizes
-class _LocalFileData:
-    """Local file data to be added to a chat session."""
-
-    name: str
-    raw_data: bytes
-
-    def __init__(self, src: _FileInputType, name: str | None = None) -> None:
-        default_name, raw_data = _get_file_details(src)
-        self.name = name or default_name
-        self.raw_data = raw_data
-
-    def _get_cache_key(self) -> _FileHandleCacheKey:
-        return (self.name, sha256(self.raw_data).digest())
-
-    def _as_fetch_param(self) -> FilesRpcUploadFileBase64Parameter:
-        content_base64 = b64encode(self.raw_data).decode("ascii")
-        return FilesRpcUploadFileBase64Parameter(
-            name=self.name, content_base64=content_base64
-        )
-
-
-_PendingFile: TypeAlias = tuple[_LocalFileData, _FileHandle]
-
-_FetchFileHandle: TypeAlias = Callable[[_LocalFileData], _FileHandle]
-_AsyncFetchFileHandle: TypeAlias = Callable[[_LocalFileData], Awaitable[_FileHandle]]
-
-
 class Chat:
     """Helper class to track LLM interactions."""
 
@@ -234,8 +167,6 @@ def __init__(
             self._history = _initial_history
         else:
             self._history = ChatHistoryData(messages=[])
-        self._pending_files: dict[_FileHandleCacheKey, _PendingFile] = {}
-        self._cached_file_handles: dict[_FileHandleCacheKey, _FileHandle] = {}
         if initial_prompt is not None:
             self.add_system_prompt(initial_prompt)
 
@@ -245,63 +176,21 @@ def _messages(self) -> MutableSequence[AnyChatMessage]:
 
     def __str__(self) -> str:
         type_name = type(self).__name__
-        formatted_data = _format_json(self._get_history_unchecked())
+        formatted_data = _format_json(self._get_history())
         return f"{type_name}.from_history({formatted_data})"
 
-    def _get_history_unchecked(self) -> ChatHistoryDataDict:
-        # Convert the history without checking for pending files
+    def _get_history(self) -> ChatHistoryDataDict:
         return cast(ChatHistoryDataDict, to_builtins(self._history))
 
     def _get_history_for_prediction(self) -> ChatHistoryDataDict:
         """Convert the current history to a format suitable for an LLM prediction."""
-        if self._pending_files:
-            # If this happens, something elsewhere in the SDK messed up
-            # Raise a standard exception so the traceback doesn't get truncated
-            raise RuntimeError(
-                "Pending file handles must be fetched before requesting an LLM prediction"
-            )
-        return self._get_history_unchecked()
+        # For a wire message, we want the dict format
+        return self._get_history()
 
     def _get_history_for_copy(self) -> ChatHistoryData:
         """Convert the current history to a format suitable for initializing a new instance."""
-        if self._pending_files:
-            # Users can trigger this without the SDK doing anything wrong,
-            # so truncate the reported traceback at the SDK boundary
-            raise LMStudioRuntimeError(
-                "Cannot copy chat history with pending file handles"
-            )
-        return ChatHistoryData._from_api_dict(self._get_history_unchecked())
-
-    def _get_pending_files_to_fetch(self) -> Mapping[_FileHandleCacheKey, _PendingFile]:
-        pending_files = self._pending_files
-        self._pending_files = {}
-        return pending_files
-
-    @staticmethod
-    def _update_pending_handle(
-        pending_handle: _FileHandle, fetched_handle: _FileHandle
-    ) -> None:
-        # Mutate the pending handle so it keeps its place in the history
-        for attr in pending_handle.__struct_fields__:
-            setattr(pending_handle, attr, getattr(fetched_handle, attr))
-
-    def _fetch_file_handles(self, fetch_file_handle: _FetchFileHandle) -> None:
-        """Synchronously fetch all currently pending file handles from the LM Studio API."""
-        pending_files = self._get_pending_files_to_fetch()
-        for cache_key, (file_data, pending_handle) in pending_files.items():
-            fetched_handle = fetch_file_handle(file_data)
-            self._update_pending_handle(pending_handle, fetched_handle)
-            self._cached_file_handles[cache_key] = fetched_handle
-
-    async def _fetch_file_handles_async(
-        self, fetch_file_handle: _AsyncFetchFileHandle
-    ) -> None:
-        """Asynchronously fetch all currently pending file handles from the LM Studio API."""
-        pending_files = self._get_pending_files_to_fetch()
-        for cache_key, (file_data, pending_handle) in pending_files.items():
-            fetched_handle = await fetch_file_handle(file_data)
-            self._update_pending_handle(pending_handle, fetched_handle)
-            self._cached_file_handles[cache_key] = fetched_handle
+        # For a new chat instance, we want struct instances
+        return ChatHistoryData._from_api_dict(self._get_history())
 
     @classmethod
     @sdk_public_api()
@@ -317,10 +206,7 @@ def from_history(
         """
         if isinstance(history, cls):
             # Create a new `cls` instance with the same history as the given chat
-            self = cls(_initial_history=history._get_history_for_copy())
-            # Retrieving the history would fail if there were pending file handles
-            self._cached_file_handles.update(history._cached_file_handles)
-            return self
+            return cls(_initial_history=history._get_history_for_copy())
         if isinstance(history, ChatHistoryData):
             # Ensure the chat is not affected by future mutation of the given history
             return cls(_initial_history=deepcopy(history))
@@ -479,48 +365,15 @@ def add_system_prompt(self, prompt: SystemPromptInput) -> SystemPrompt:
         self._messages.append(message)
         return message
 
-    def _get_file_handle(
-        self, src: _FileInputType, name: str | None = None
-    ) -> _FileHandle:
-        file_data = _LocalFileData(src, name)
-        cache_key = file_data._get_cache_key()
-        try:
-            # Check if file handle has already been fetched
-            return self._cached_file_handles[cache_key]
-        except KeyError:
-            pass
-        try:
-            # Check if file handle already has a fetch pending
-            pending_file = self._pending_files[cache_key]
-            return pending_file[1]
-        except KeyError:
-            pass
-        # Create a new pending file handle
-        to_be_populated = _FileHandle(
-            name=file_data.name,
-            identifier="<file addition pending>",
-            size_bytes=-1,  # Let the fetch operation set this later
-            file_type="unknown",
-        )
-        self._pending_files[cache_key] = (file_data, to_be_populated)
-        return to_be_populated
-
-    @sdk_public_api()
-    def _add_file(self, src: _FileInputType, name: str | None = None) -> UserMessage:
-        """Add a local file (or raw binary data) to the chat history."""
-        # Private until LM Studio file handle support stabilizes
-        file_handle = self._get_file_handle(src, name)
-        return self.add_user_message(file_handle)
-
     @sdk_public_api()
     def add_user_message(
         self,
         content: UserMessageInput | Iterable[UserMessageInput],
         *,
+        images: Sequence[FileHandleInput] = (),
         # Mark file parameters as private until LM Studio
         # file handle support stabilizes
-        _images: Sequence[_FileInputType] = (),
-        _files: Sequence[_FileInputType] = (),
+        _files: Sequence[FileHandleInput] = (),
     ) -> UserMessage:
         """Add a new user message to the chat history."""
         # Accept both singular and multi-part user messages
@@ -530,10 +383,10 @@ def add_user_message(
         else:
             content_items = list(content)
         # Convert given local file information to file handles
-        if _images:
-            content_items.extend(self._get_file_handle(f) for f in _images)
+        if images:
+            content_items.extend(images)
         if _files:
-            content_items.extend(self._get_file_handle(f) for f in _files)
+            content_items.extend(_files)
         # Consecutive messages with the same role are not supported,
         # but multi-part user messages are valid (to allow for file
         # attachments), so just merge them
@@ -547,7 +400,7 @@ def add_user_message(
             match item:
                 # Sadly, we can't use the union type aliases for matching,
                 # since the compiler needs visibility into every match target
-                case TextData() | _FileHandle():
+                case TextData() | FileHandle():
                     _content.append(item)
                 case str():
                     _content.append(TextData(text=item))
@@ -559,7 +412,7 @@ def add_user_message(
                     "file_type": _,
                 }:
                     # We accept snake_case here for consistency, but don't really expect it
-                    _content.append(_FileHandle._from_any_dict(item))
+                    _content.append(FileHandle._from_any_dict(item))
                 case _:
                     raise LMStudioValueError(
                         f"Unable to parse user message content: {item}"
@@ -573,14 +426,14 @@ def add_user_message(
     @classmethod
     def _parse_assistant_response(
         cls, response: AnyAssistantResponseInput
-    ) -> TextData | _FileHandle:
+    ) -> TextData | FileHandle:
         # Note: tool call requests are NOT accepted here, as they're expected
         # to follow an initial text response
         # It's not clear if file handles should be accepted as it's not obvious
         # how client applications should process those (even though the API
         # format nominally permits them here)
         match response:
-            case TextData() | _FileHandle():
+            case TextData() | FileHandle():
                 return response
             case str():
                 return TextData(text=response)
@@ -594,7 +447,7 @@ def _parse_assistant_response(
                 "file_type": _,
             }:
                 # We accept snake_case here for consistency, but don't really expect it
-                return _FileHandle._from_any_dict(response)
+                return FileHandle._from_any_dict(response)
             case _:
                 raise LMStudioValueError(
                     f"Unable to parse assistant response content: {response}"
@@ -664,3 +517,135 @@ def add_tool_result(self, result: ToolCallResultInput) -> ToolResultMessage:
         message = ToolResultMessage(content=[message_data])
         self._messages.append(message)
         return message
+
+
+# Private until file handle caching support is part of the published SDK API
+_FileCacheInputType = BinaryIO | bytes | str | os.PathLike[str]
+
+
+def _get_file_details(src: _FileCacheInputType) -> Tuple[str, bytes]:
+    """Read file contents as binary data and generate a suitable default name."""
+    if isinstance(src, bytes):
+        # We interpreter bytes as raw data, not a bytes filesystem path
+        data = src
+        name = str(uuid.uuid4())
+    elif hasattr(src, "read"):
+        try:
+            data = src.read()
+        except OSError as exc:
+            err_msg = f"Error while reading {src!r} ({exc!r})"
+            raise LMStudioOSError(err_msg) from None
+        name = getattr(src, "name", str(uuid.uuid4()))
+    else:
+        try:
+            src_path = Path(src)
+        except Exception as exc:
+            err_msg = f"Expected file-like object, filesystem path, or bytes ({exc!r})"
+            raise LMStudioValueError(err_msg) from None
+        try:
+            data = src_path.read_bytes()
+        except OSError as exc:
+            err_msg = f"Error while reading {str(src_path)!r} ({exc!r})"
+            raise LMStudioOSError(err_msg) from None
+        name = str(src_path.name)
+    return name, data
+
+
+_ContentHash: TypeAlias = bytes
+_FileHandleCacheKey: TypeAlias = tuple[str, _ContentHash]
+
+
+# Private until file handle caching support is part of the published SDK API
+class _LocalFileData:
+    """Local file data to be added to a chat history."""
+
+    name: str
+    raw_data: bytes
+
+    def __init__(self, src: _FileCacheInputType, name: str | None = None) -> None:
+        default_name, raw_data = _get_file_details(src)
+        self.name = name or default_name
+        self.raw_data = raw_data
+
+    def _get_cache_key(self) -> _FileHandleCacheKey:
+        return (self.name, sha256(self.raw_data).digest())
+
+    def _as_fetch_param(self) -> FilesRpcUploadFileBase64Parameter:
+        content_base64 = b64encode(self.raw_data).decode("ascii")
+        return FilesRpcUploadFileBase64Parameter(
+            name=self.name, content_base64=content_base64
+        )
+
+
+_PendingFile: TypeAlias = tuple[_LocalFileData, FileHandle]
+
+_FetchFileHandle: TypeAlias = Callable[[_LocalFileData], FileHandle]
+_AsyncFetchFileHandle: TypeAlias = Callable[[_LocalFileData], Awaitable[FileHandle]]
+
+
+# TODO: Now that the file handle caching is no longer part of the chat history management,
+#       redesign it to resolve file handles with the server immediately.
+class _FileHandleCache:
+    """Local file data to be added to a chat session."""
+
+    def __init__(self) -> None:
+        self._pending_files: dict[_FileHandleCacheKey, _PendingFile] = {}
+        self._cached_file_handles: dict[_FileHandleCacheKey, FileHandle] = {}
+
+    @sdk_public_api()
+    def _get_file_handle(
+        self, src: _FileCacheInputType, name: str | None = None
+    ) -> FileHandle:
+        file_data = _LocalFileData(src, name)
+        cache_key = file_data._get_cache_key()
+        try:
+            # Check if file handle has already been fetched
+            return self._cached_file_handles[cache_key]
+        except KeyError:
+            pass
+        try:
+            # Check if file handle already has a fetch pending
+            pending_file = self._pending_files[cache_key]
+            return pending_file[1]
+        except KeyError:
+            pass
+        # Create a new pending file handle
+        to_be_populated = FileHandle(
+            name=file_data.name,
+            identifier="<file addition pending>",
+            size_bytes=-1,  # Let the fetch operation set this later
+            file_type="unknown",
+        )
+        self._pending_files[cache_key] = (file_data, to_be_populated)
+        return to_be_populated
+
+    def _get_pending_files_to_fetch(self) -> Mapping[_FileHandleCacheKey, _PendingFile]:
+        pending_files = self._pending_files
+        self._pending_files = {}
+        return pending_files
+
+    @staticmethod
+    def _update_pending_handle(
+        pending_handle: FileHandle, fetched_handle: FileHandle
+    ) -> None:
+        # Mutate the pending handle so it keeps its place in the history
+        for attr in pending_handle.__struct_fields__:
+            setattr(pending_handle, attr, getattr(fetched_handle, attr))
+
+    def _fetch_file_handles(self, fetch_file_handle: _FetchFileHandle) -> None:
+        """Synchronously fetch all currently pending file handles from the LM Studio API."""
+        pending_files = self._get_pending_files_to_fetch()
+        for cache_key, (file_data, pending_handle) in pending_files.items():
+            fetched_handle = fetch_file_handle(file_data)
+            self._update_pending_handle(pending_handle, fetched_handle)
+            self._cached_file_handles[cache_key] = fetched_handle
+
+    async def _fetch_file_handles_async(
+        self, fetch_file_handle: _AsyncFetchFileHandle
+    ) -> None:
+        """Asynchronously fetch all currently pending file handles from the LM Studio API."""
+        pending_files = self._get_pending_files_to_fetch()
+        for cache_key, (file_data, pending_handle) in pending_files.items():
+            fetched_handle = await fetch_file_handle(file_data)
+            self._update_pending_handle(pending_handle, fetched_handle)
+            self._cached_file_handles[cache_key] = fetched_handle
diff --git a/src/lmstudio/sync_api.py b/src/lmstudio/sync_api.py
index ca7b8d3..358eda0 100644
--- a/src/lmstudio/sync_api.py
+++ b/src/lmstudio/sync_api.py
@@ -52,8 +52,8 @@
     ToolResultMessage,
     Chat,
     ChatHistoryDataDict,
-    _FileHandle,
-    _FileInputType,
+    FileHandle,
+    _FileCacheInputType,
     _LocalFileData,
     ToolCallRequest,
 )
@@ -756,18 +756,18 @@ class _SyncSessionFiles(SyncSession):
 
     API_NAMESPACE = "files"
 
-    def _fetch_file_handle(self, file_data: _LocalFileData) -> _FileHandle:
+    def _fetch_file_handle(self, file_data: _LocalFileData) -> FileHandle:
         handle = self.remote_call("uploadFileBase64", file_data._as_fetch_param())
         # Returned dict provides the handle identifier, file type, and size in bytes
         # Add the extra fields needed for a FileHandle (aka ChatMessagePartFileData)
         handle["name"] = file_data.name
         handle["type"] = "file"
-        return load_struct(handle, _FileHandle)
+        return load_struct(handle, FileHandle)
 
     @sdk_public_api()
     def _add_temp_file(
-        self, src: _FileInputType, name: str | None = None
-    ) -> _FileHandle:
+        self, src: _FileCacheInputType, name: str | None = None
+    ) -> FileHandle:
         """Add a file to the server."""
         # Private until LM Studio file handle support stabilizes
         file_data = _LocalFileData(src, name)
@@ -1008,7 +1008,7 @@ def list_downloaded(self) -> Sequence[TDownloadedModel]:
         models = self._system_session.list_downloaded_models()
         return [m for m in models if self._is_relevant_model(m)]
 
-    def _fetch_file_handle(self, file_data: _LocalFileData) -> _FileHandle:
+    def _fetch_file_handle(self, file_data: _LocalFileData) -> FileHandle:
         return self._files_session._fetch_file_handle(file_data)
 
 
@@ -1217,7 +1217,6 @@ def _respond_stream(
         """Request a response in an ongoing assistant chat session and stream the generated tokens."""
         if not isinstance(history, Chat):
             history = Chat.from_history(history)
-        history._fetch_file_handles(self._fetch_file_handle)
         endpoint = ChatResponseEndpoint(
             model_specifier,
             history,
@@ -1241,7 +1240,6 @@ def _apply_prompt_template(
         """Apply a prompt template to the given history."""
         if not isinstance(history, Chat):
             history = Chat.from_history(history)
-        history._fetch_file_handles(self._fetch_file_handle)
         if not isinstance(opts, LlmApplyPromptTemplateOpts):
             opts = LlmApplyPromptTemplateOpts.from_dict(opts)
         params = LlmRpcApplyPromptTemplateParameter._from_api_dict(
@@ -1573,10 +1571,8 @@ def act(
         start_time = time.perf_counter()
         # It is not yet possible to combine tool calling with requests for structured responses
         response_format = None
-        if isinstance(chat, Chat):
-            chat._fetch_file_handles(self._session._fetch_file_handle)
         agent_chat: Chat = Chat.from_history(chat)
-        del chat
+        del chat  # Avoid any further access to the input chat history
         # Multiple rounds, until all tool calls are resolved or limit is reached
         round_counter: Iterable[int]
         if max_prediction_rounds is not None:
@@ -1825,8 +1821,8 @@ def repository(self) -> SyncSessionRepository:
     # Convenience methods
     @sdk_public_api()
     def _add_temp_file(
-        self, src: _FileInputType, name: str | None = None
-    ) -> _FileHandle:
+        self, src: _FileCacheInputType, name: str | None = None
+    ) -> FileHandle:
         """Add a file to the server."""
         # Private until LM Studio file handle support stabilizes
         return self._files._add_temp_file(src, name)
@@ -1899,7 +1895,7 @@ def embedding_model(
 
 
 @sdk_public_api()
-def _add_temp_file(src: _FileInputType, name: str | None = None) -> _FileHandle:
+def _add_temp_file(src: _FileCacheInputType, name: str | None = None) -> FileHandle:
     """Add a file to the server using the default global client."""
     # Private until LM Studio file handle support stabilizes
     return get_default_client()._add_temp_file(src, name)
diff --git a/tests/async/test_images_async.py b/tests/async/test_images_async.py
index 2f2cba8..69f178b 100644
--- a/tests/async/test_images_async.py
+++ b/tests/async/test_images_async.py
@@ -7,7 +7,7 @@
 
 from io import BytesIO
 
-from lmstudio import AsyncClient, Chat, _FileHandle, LMStudioServerError
+from lmstudio import AsyncClient, Chat, FileHandle, LMStudioServerError
 
 from ..support import (
     EXPECTED_VLM_ID,
@@ -26,7 +26,7 @@ async def test_upload_from_pathlike_async(caplog: LogCap) -> None:
         session = client._files
         file = await session._add_temp_file(IMAGE_FILEPATH)
         assert file
-        assert isinstance(file, _FileHandle)
+        assert isinstance(file, FileHandle)
         logging.info(f"Uploaded file: {file}")
 
 
@@ -39,7 +39,7 @@ async def test_upload_from_file_obj_async(caplog: LogCap) -> None:
         with open(IMAGE_FILEPATH, "rb") as f:
             file = await session._add_temp_file(f)
         assert file
-        assert isinstance(file, _FileHandle)
+        assert isinstance(file, FileHandle)
         logging.info(f"Uploaded file: {file}")
 
 
@@ -52,7 +52,7 @@ async def test_upload_from_bytesio_async(caplog: LogCap) -> None:
         with open(IMAGE_FILEPATH, "rb") as f:
             file = await session._add_temp_file(BytesIO(f.read()))
         assert file
-        assert isinstance(file, _FileHandle)
+        assert isinstance(file, FileHandle)
         logging.info(f"Uploaded file: {file}")
 
 
@@ -96,15 +96,14 @@ async def test_non_vlm_predict_async(caplog: LogCap) -> None:
 @pytest.mark.asyncio
 @pytest.mark.slow
 @pytest.mark.lmstudio
-async def test_vlm_predict_implicit_file_handles_async(caplog: LogCap) -> None:
+async def test_vlm_predict_image_param_async(caplog: LogCap) -> None:
     prompt = VLM_PROMPT
     caplog.set_level(logging.DEBUG)
     model_id = EXPECTED_VLM_ID
     async with AsyncClient() as client:
+        file_handle = await client._files._add_temp_file(IMAGE_FILEPATH)
         history = Chat()
-        history.add_user_message(prompt)
-        # File handles will be implicitly acquired when preparing the prediction request
-        history._add_file(IMAGE_FILEPATH)
+        history.add_user_message(prompt, images=[file_handle])
         vlm = await client.llm.model(model_id)
         response = await vlm.respond(history, config=SHORT_PREDICTION_CONFIG)
     logging.info(f"VLM response: {response!r}")
@@ -117,15 +116,14 @@ async def test_vlm_predict_implicit_file_handles_async(caplog: LogCap) -> None:
 
 @pytest.mark.asyncio
 @pytest.mark.lmstudio
-async def test_non_vlm_predict_implicit_file_handles_async(caplog: LogCap) -> None:
+async def test_non_vlm_predict_image_param_async(caplog: LogCap) -> None:
     prompt = VLM_PROMPT
     caplog.set_level(logging.DEBUG)
     model_id = "hugging-quants/llama-3.2-1b-instruct"
     async with AsyncClient() as client:
+        file_handle = await client._files._add_temp_file(IMAGE_FILEPATH)
         history = Chat()
-        history.add_user_message(prompt)
-        # File handles will be implicitly acquired when preparing the prediction request
-        history._add_file(IMAGE_FILEPATH)
+        history.add_user_message(prompt, images=[file_handle])
         llm = await client.llm.model(model_id)
         with pytest.raises(LMStudioServerError) as exc_info:
             await llm.respond(history)
diff --git a/tests/async/test_llm_async.py b/tests/async/test_llm_async.py
index 727ad94..acb5fbc 100644
--- a/tests/async/test_llm_async.py
+++ b/tests/async/test_llm_async.py
@@ -15,7 +15,7 @@
 @pytest.mark.parametrize("model_id", (EXPECTED_LLM, EXPECTED_LLM_ID))
 async def test_apply_prompt_template_async(model_id: str, caplog: LogCap) -> None:
     caplog.set_level(logging.DEBUG)
-    file_data = history._FileHandle(
+    file_data = history.FileHandle(
         name="someFile.txt",
         identifier="some-file",
         size_bytes=100,
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..c3850a3
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,6 @@
+"""Runtime test suite configuration"""
+
+import pytest
+
+# Ensure support module assertions provide failure details
+pytest.register_assert_rewrite("tests.support")
diff --git a/tests/sync/test_images_sync.py b/tests/sync/test_images_sync.py
index 2b085b6..69298a7 100644
--- a/tests/sync/test_images_sync.py
+++ b/tests/sync/test_images_sync.py
@@ -14,7 +14,7 @@
 
 from io import BytesIO
 
-from lmstudio import Client, Chat, _FileHandle, LMStudioServerError
+from lmstudio import Client, Chat, FileHandle, LMStudioServerError
 
 from ..support import (
     EXPECTED_VLM_ID,
@@ -32,7 +32,7 @@ def test_upload_from_pathlike_sync(caplog: LogCap) -> None:
         session = client._files
         file = session._add_temp_file(IMAGE_FILEPATH)
         assert file
-        assert isinstance(file, _FileHandle)
+        assert isinstance(file, FileHandle)
         logging.info(f"Uploaded file: {file}")
 
 
@@ -44,7 +44,7 @@ def test_upload_from_file_obj_sync(caplog: LogCap) -> None:
         with open(IMAGE_FILEPATH, "rb") as f:
             file = session._add_temp_file(f)
         assert file
-        assert isinstance(file, _FileHandle)
+        assert isinstance(file, FileHandle)
         logging.info(f"Uploaded file: {file}")
 
 
@@ -56,7 +56,7 @@ def test_upload_from_bytesio_sync(caplog: LogCap) -> None:
         with open(IMAGE_FILEPATH, "rb") as f:
             file = session._add_temp_file(BytesIO(f.read()))
         assert file
-        assert isinstance(file, _FileHandle)
+        assert isinstance(file, FileHandle)
         logging.info(f"Uploaded file: {file}")
 
 
@@ -97,15 +97,14 @@ def test_non_vlm_predict_sync(caplog: LogCap) -> None:
 
 @pytest.mark.slow
 @pytest.mark.lmstudio
-def test_vlm_predict_implicit_file_handles_sync(caplog: LogCap) -> None:
+def test_vlm_predict_image_param_sync(caplog: LogCap) -> None:
     prompt = VLM_PROMPT
     caplog.set_level(logging.DEBUG)
     model_id = EXPECTED_VLM_ID
     with Client() as client:
+        file_handle = client._files._add_temp_file(IMAGE_FILEPATH)
         history = Chat()
-        history.add_user_message(prompt)
-        # File handles will be implicitly acquired when preparing the prediction request
-        history._add_file(IMAGE_FILEPATH)
+        history.add_user_message(prompt, images=[file_handle])
         vlm = client.llm.model(model_id)
         response = vlm.respond(history, config=SHORT_PREDICTION_CONFIG)
     logging.info(f"VLM response: {response!r}")
@@ -117,15 +116,14 @@ def test_vlm_predict_implicit_file_handles_sync(caplog: LogCap) -> None:
 
 
 @pytest.mark.lmstudio
-def test_non_vlm_predict_implicit_file_handles_sync(caplog: LogCap) -> None:
+def test_non_vlm_predict_image_param_sync(caplog: LogCap) -> None:
     prompt = VLM_PROMPT
     caplog.set_level(logging.DEBUG)
     model_id = "hugging-quants/llama-3.2-1b-instruct"
     with Client() as client:
+        file_handle = client._files._add_temp_file(IMAGE_FILEPATH)
         history = Chat()
-        history.add_user_message(prompt)
-        # File handles will be implicitly acquired when preparing the prediction request
-        history._add_file(IMAGE_FILEPATH)
+        history.add_user_message(prompt, images=[file_handle])
         llm = client.llm.model(model_id)
         with pytest.raises(LMStudioServerError) as exc_info:
             llm.respond(history)
diff --git a/tests/sync/test_llm_sync.py b/tests/sync/test_llm_sync.py
index df055d5..d0dad09 100644
--- a/tests/sync/test_llm_sync.py
+++ b/tests/sync/test_llm_sync.py
@@ -21,7 +21,7 @@
 @pytest.mark.parametrize("model_id", (EXPECTED_LLM, EXPECTED_LLM_ID))
 def test_apply_prompt_template_sync(model_id: str, caplog: LogCap) -> None:
     caplog.set_level(logging.DEBUG)
-    file_data = history._FileHandle(
+    file_data = history.FileHandle(
         name="someFile.txt",
         identifier="some-file",
         size_bytes=100,
diff --git a/tests/test_history.py b/tests/test_history.py
index a34cc4f..bb98c2f 100644
--- a/tests/test_history.py
+++ b/tests/test_history.py
@@ -7,7 +7,7 @@
 
 import pytest
 
-from lmstudio.sdk_api import LMStudioOSError, LMStudioRuntimeError
+from lmstudio.sdk_api import LMStudioOSError
 from lmstudio.schemas import DictObject
 from lmstudio.history import (
     AnyChatMessageInput,
@@ -15,8 +15,10 @@
     AnyChatMessageDict,
     ChatHistoryData,
     ChatHistoryDataDict,
-    _FileHandle,
-    _FileHandleDict,
+    _FileCacheInputType,
+    FileHandle,
+    _FileHandleCache,
+    FileHandleDict,
     _LocalFileData,
     TextData,
 )
@@ -255,13 +257,13 @@ def test_from_history_with_simple_text() -> None:
     assert chat._get_history_for_prediction() == expected_history
 
 
-INPUT_FILE_HANDLE = _FileHandle(
+INPUT_FILE_HANDLE = FileHandle(
     name="someFile.txt",
     identifier="some-file",
     size_bytes=100,
     file_type="text/plain",
 )
-INPUT_FILE_HANDLE_DICT: _FileHandleDict = {
+INPUT_FILE_HANDLE_DICT: FileHandleDict = {
     "type": "file",
     "name": "someOtherFile.txt",
     "identifier": "some-other-file",
@@ -377,98 +379,88 @@ def test_add_prediction_results() -> None:
     assert chat._get_history_for_prediction() == EXPECTED_PREDICTION_RESPONSE_HISTORY
 
 
-EXPECTED_LOCAL_FILE_MESSAGES = [
+EXPECTED_PENDING_FILE_HANDLES = [
     {
-        "content": [
-            {
-                "fileType": "unknown",
-                "identifier": "<file addition pending>",
-                "name": "raw-binary.txt",
-                "sizeBytes": -1,
-                "type": "file",
-            },
-            {
-                "fileType": "unknown",
-                "identifier": "<file addition pending>",
-                "name": "raw-binary.txt",
-                "sizeBytes": -1,
-                "type": "file",
-            },
-            {
-                "fileType": "unknown",
-                "identifier": "<file addition pending>",
-                "name": "lemmy.png",
-                "sizeBytes": -1,
-                "type": "file",
-            },
-            {
-                "fileType": "unknown",
-                "identifier": "<file addition pending>",
-                "name": "also-lemmy.png",
-                "sizeBytes": -1,
-                "type": "file",
-            },
-            {
-                "fileType": "unknown",
-                "identifier": "<file addition pending>",
-                "name": "lemmy.png",
-                "sizeBytes": -1,
-                "type": "file",
-            },
-        ],
-        "role": "user",
+        "fileType": "unknown",
+        "identifier": "<file addition pending>",
+        "name": "raw-binary.txt",
+        "sizeBytes": -1,
+        "type": "file",
+    },
+    {
+        "fileType": "unknown",
+        "identifier": "<file addition pending>",
+        "name": "raw-binary.txt",
+        "sizeBytes": -1,
+        "type": "file",
+    },
+    {
+        "fileType": "unknown",
+        "identifier": "<file addition pending>",
+        "name": "lemmy.png",
+        "sizeBytes": -1,
+        "type": "file",
+    },
+    {
+        "fileType": "unknown",
+        "identifier": "<file addition pending>",
+        "name": "also-lemmy.png",
+        "sizeBytes": -1,
+        "type": "file",
+    },
+    {
+        "fileType": "unknown",
+        "identifier": "<file addition pending>",
+        "name": "lemmy.png",
+        "sizeBytes": -1,
+        "type": "file",
     },
 ]
 
 
-EXPECTED_FILE_HANDLE_MESSAGES: list[DictObject] = [
+EXPECTED_RESOLVED_FILE_HANDLES: list[DictObject] = [
     {
-        "content": [
-            {
-                "fileType": "text/plain",
-                "identifier": "file-1",
-                "name": "raw-binary.txt",
-                "sizeBytes": 20,
-                "type": "file",
-            },
-            {
-                "fileType": "text/plain",
-                "identifier": "file-1",
-                "name": "raw-binary.txt",
-                "sizeBytes": 20,
-                "type": "file",
-            },
-            {
-                "fileType": "image",
-                "identifier": "file-2",
-                "name": "lemmy.png",
-                "sizeBytes": 41812,
-                "type": "file",
-            },
-            {
-                "fileType": "image",
-                "identifier": "file-3",
-                "name": "also-lemmy.png",
-                "sizeBytes": 41812,
-                "type": "file",
-            },
-            {
-                "fileType": "image",
-                "identifier": "file-2",
-                "name": "lemmy.png",
-                "sizeBytes": 41812,
-                "type": "file",
-            },
-        ],
-        "role": "user",
+        "fileType": "text/plain",
+        "identifier": "file-1",
+        "name": "raw-binary.txt",
+        "sizeBytes": 20,
+        "type": "file",
+    },
+    {
+        "fileType": "text/plain",
+        "identifier": "file-1",
+        "name": "raw-binary.txt",
+        "sizeBytes": 20,
+        "type": "file",
+    },
+    {
+        "fileType": "image",
+        "identifier": "file-2",
+        "name": "lemmy.png",
+        "sizeBytes": 41812,
+        "type": "file",
+    },
+    {
+        "fileType": "image",
+        "identifier": "file-3",
+        "name": "also-lemmy.png",
+        "sizeBytes": 41812,
+        "type": "file",
+    },
+    {
+        "fileType": "image",
+        "identifier": "file-2",
+        "name": "lemmy.png",
+        "sizeBytes": 41812,
+        "type": "file",
     },
 ]
 
 
-def _add_file(file_data: _LocalFileData, identifier: str) -> _FileHandle:
+def _add_file(file_data: _LocalFileData, identifier: str) -> FileHandle:
     name = file_data.name
     fetch_param = file_data._as_fetch_param()
-    return _FileHandle(
+    return FileHandle(
         name=name,
         identifier=identifier,
         size_bytes=len(fetch_param.content_base64),
@@ -476,146 +468,121 @@ def _add_file(file_data: _LocalFileData, identifier: str) -> _FileHandle:
     )
 
 
-def _check_pending_file(file_handle_dict: DictObject, name: str) -> None:
-    assert file_handle_dict["type"] == "file"
-    assert file_handle_dict["name"] == name
-    assert file_handle_dict["identifier"] == "<file addition pending>"
-    assert file_handle_dict["sizeBytes"] == -1
-    assert file_handle_dict["fileType"] == "unknown"
+def _check_pending_file(file_handle: FileHandle, name: str) -> None:
+    assert file_handle.type == "file"
+    assert file_handle.name == name
+    assert file_handle.identifier == "<file addition pending>"
+    assert file_handle.size_bytes == -1
+    assert file_handle.file_type == "unknown"
 
 
 def _check_fetched_text_file(
-    file_handle_dict: DictObject, name: str, identifier: str
+    file_handle: FileHandle, name: str, identifier: str
 ) -> None:
-    assert file_handle_dict["type"] == "file"
-    assert file_handle_dict["name"] == name
-    assert file_handle_dict["identifier"] == identifier
-    assert file_handle_dict["sizeBytes"] > 0
-    assert file_handle_dict["fileType"] == "text/plain"
+    assert file_handle.type == "file"
+    assert file_handle.name == name
+    assert file_handle.identifier == identifier
+    assert file_handle.size_bytes > 0
+    assert file_handle.file_type == "text/plain"
 
 
-def _make_local_file_context() -> tuple[Chat, int]:
+def _make_local_file_cache() -> tuple[_FileHandleCache, list[FileHandle], int]:
     # File context for fetching handles that ensures
     # * duplicate files are only looked up once
     # * files with different names are looked up under both names
-    chat = Chat()
+    cache = _FileHandleCache()
     num_unique_files = 3
-    chat._add_file(b"raw binary data", "raw-binary.txt")
-    chat._add_file(b"raw binary data", "raw-binary.txt")
-    chat._add_file(IMAGE_FILEPATH)
-    chat._add_file(IMAGE_FILEPATH, "also-lemmy.png")
-    chat._add_file(IMAGE_FILEPATH)
-    with pytest.raises(RuntimeError, match="Pending file handles must be fetched"):
-        chat._get_history_for_prediction()
-    history = chat._get_history_unchecked()
-    assert history["messages"] == EXPECTED_LOCAL_FILE_MESSAGES
-    return chat, num_unique_files
+    files_to_cache: list[tuple[_FileCacheInputType, str | None]] = [
+        (b"raw binary data", "raw-binary.txt"),
+        (b"raw binary data", "raw-binary.txt"),
+        (IMAGE_FILEPATH, None),
+        (IMAGE_FILEPATH, "also-lemmy.png"),
+        (IMAGE_FILEPATH, None),
+    ]
+    file_handles: list[FileHandle] = []
+    for args in files_to_cache:
+        file_handles.append(cache._get_file_handle(*args))
+    assert [h.to_dict() for h in file_handles] == EXPECTED_PENDING_FILE_HANDLES
+    return cache, file_handles, num_unique_files
 
 
 # TODO: Improve code sharing between this test case and its async counterpart
 #       (potentially by moving the async version to `async/test_history_async.py`)
-def test_implicit_file_handles() -> None:
+def test_file_handle_cache() -> None:
     local_files: list[_LocalFileData] = []
-    file_handles: list[_FileHandle] = []
+    unique_file_handles: list[FileHandle] = []
 
-    def add_file(file_data: _LocalFileData) -> _FileHandle:
+    def add_file(file_data: _LocalFileData) -> FileHandle:
         local_files.append(file_data)
         result = _add_file(file_data, f"file-{len(local_files)}")
-        file_handles.append(result)
+        unique_file_handles.append(result)
         return result
 
-    context, num_unique_files = _make_local_file_context()
-    context._fetch_file_handles(add_file)
+    cache, file_handles, num_unique_files = _make_local_file_cache()
+    cache._fetch_file_handles(add_file)
     assert len(local_files) == num_unique_files
-    assert len(file_handles) == num_unique_files
-    messages = context._get_history_for_prediction()["messages"]
-    assert messages == EXPECTED_FILE_HANDLE_MESSAGES
-    expected_num_message_parts = len(messages[0]["content"])
+    assert len(unique_file_handles) == num_unique_files
+    assert [h.to_dict() for h in file_handles] == EXPECTED_RESOLVED_FILE_HANDLES
     # Adding the same file again should immediately populate the handle
-    expected_num_message_parts += 1
-    context._add_file(IMAGE_FILEPATH)
-    messages = context._get_history_for_prediction()["messages"]
-    assert len(messages) == 1
-    assert len(messages[0]["content"]) == expected_num_message_parts
-    assert messages[0]["content"][-1] == messages[0]["content"][-2]
+    image_handle = cache._get_file_handle(IMAGE_FILEPATH)
+    assert image_handle == file_handles[-1]
     # Fetching again should not perform any lookups
-    context._fetch_file_handles(add_file)
+    cache._fetch_file_handles(add_file)
     assert len(local_files) == num_unique_files
-    assert len(file_handles) == num_unique_files
+    assert len(unique_file_handles) == num_unique_files
     # Adding a different file should require a new lookup
-    expected_num_message_parts += 1
-    context._add_file(__file__)
-    with pytest.raises(RuntimeError, match="Pending file handles must be fetched"):
-        context._get_history_for_prediction()
-    messages = context._get_history_unchecked()["messages"]
-    assert len(messages) == 1
-    assert len(messages[0]["content"]) == expected_num_message_parts
+    this_file_handle = cache._get_file_handle(__file__)
     expected_name = f"{__name__.rpartition('.')[2]}.py"
-    added_file_handle = messages[-1]["content"][-1]
-    _check_pending_file(added_file_handle, expected_name)
-    context._fetch_file_handles(add_file)
-    messages = context._get_history_for_prediction()["messages"]
+    _check_pending_file(this_file_handle, expected_name)
+    cache._fetch_file_handles(add_file)
     assert len(local_files) == num_unique_files + 1
-    assert len(file_handles) == num_unique_files + 1
-    # While the pending file handle should be updated in place,
-    # retrieving the history takes a snapshot of the internal state
-    added_file_handle = messages[-1]["content"][-1]
+    assert len(unique_file_handles) == num_unique_files + 1
     expected_identifier = f"file-{num_unique_files + 1}"
-    _check_fetched_text_file(added_file_handle, expected_name, expected_identifier)
+    _check_fetched_text_file(this_file_handle, expected_name, expected_identifier)
 
 
 @pytest.mark.asyncio
-async def test_implicit_file_handles_async() -> None:
+async def test_file_handle_cache_async() -> None:
     local_files: list[_LocalFileData] = []
-    file_handles: list[_FileHandle] = []
+    unique_file_handles: list[FileHandle] = []
 
-    async def add_file(file_data: _LocalFileData) -> _FileHandle:
+    async def add_file(file_data: _LocalFileData) -> FileHandle:
         local_files.append(file_data)
         result = _add_file(file_data, f"file-{len(local_files)}")
-        file_handles.append(result)
+        unique_file_handles.append(result)
         return result
 
-    context, num_unique_files = _make_local_file_context()
-    await context._fetch_file_handles_async(add_file)
+    cache, file_handles, num_unique_files = _make_local_file_cache()
+    await cache._fetch_file_handles_async(add_file)
     assert len(local_files) == num_unique_files
-    assert len(file_handles) == num_unique_files
-    messages = context._get_history_for_prediction()["messages"]
-    assert messages == EXPECTED_FILE_HANDLE_MESSAGES
-    expected_num_message_parts = len(messages[0]["content"])
+    assert len(unique_file_handles) == num_unique_files
+    assert [h.to_dict() for h in file_handles] == EXPECTED_RESOLVED_FILE_HANDLES
     # Adding the same file again should immediately populate the handle
-    expected_num_message_parts += 1
-    context._add_file(IMAGE_FILEPATH)
-    messages = context._get_history_for_prediction()["messages"]
-    assert len(messages) == 1
-    assert len(messages[0]["content"]) == expected_num_message_parts
-    assert messages[0]["content"][-1] == messages[0]["content"][-2]
+    image_handle = cache._get_file_handle(IMAGE_FILEPATH)
+    assert image_handle == file_handles[-1]
     # Fetching again should not perform any lookups
-    await context._fetch_file_handles_async(add_file)
+    await cache._fetch_file_handles_async(add_file)
     assert len(local_files) == num_unique_files
-    assert len(file_handles) == num_unique_files
+    assert len(unique_file_handles) == num_unique_files
     # Adding a different file should require a new lookup
-    expected_num_message_parts += 1
-    context._add_file(__file__)
-    with pytest.raises(RuntimeError, match="Pending file handles must be fetched"):
-        context._get_history_for_prediction()
-    messages = context._get_history_unchecked()["messages"]
-    assert len(messages) == 1
-    assert len(messages[0]["content"]) == expected_num_message_parts
+    this_file_handle = cache._get_file_handle(__file__)
     expected_name = f"{__name__.rpartition('.')[2]}.py"
-    added_file_handle = messages[-1]["content"][-1]
-    _check_pending_file(added_file_handle, expected_name)
-    await context._fetch_file_handles_async(add_file)
-    messages = context._get_history_for_prediction()["messages"]
+    _check_pending_file(this_file_handle, expected_name)
+    await cache._fetch_file_handles_async(add_file)
     assert len(local_files) == num_unique_files + 1
-    assert len(file_handles) == num_unique_files + 1
-    # While the pending file handle should be updated in place,
-    # retrieving the history takes a snapshot of the internal state
-    added_file_handle = messages[-1]["content"][-1]
+    assert len(unique_file_handles) == num_unique_files + 1
     expected_identifier = f"file-{num_unique_files + 1}"
-    _check_fetched_text_file(added_file_handle, expected_name, expected_identifier)
+    _check_fetched_text_file(this_file_handle, expected_name, expected_identifier)
+
 
+def test_invalid_local_file() -> None:
+    cache = _FileHandleCache()
+    with pytest.raises(LMStudioOSError) as exc_info:
+        cache._get_file_handle("No such file")
+    check_sdk_error(exc_info, __file__)
 
-EXPECTED_PENDING_ATTACHMENT_MESSAGES = [
+
+EXPECTED_USER_ATTACHMENT_MESSAGES = [
     {
         "content": [
             {
@@ -623,17 +590,17 @@ async def add_file(file_data: _LocalFileData) -> _FileHandle:
                 "type": "text",
             },
             {
-                "fileType": "unknown",
-                "identifier": "<file addition pending>",
+                "fileType": "image",
+                "identifier": "some-image",
                 "name": "lemmy.png",
-                "sizeBytes": -1,
+                "sizeBytes": 41812,
                 "type": "file",
             },
             {
-                "fileType": "unknown",
-                "identifier": "<file addition pending>",
-                "name": "test_history.py",
-                "sizeBytes": -1,
+                "fileType": "text/plain",
+                "identifier": "some-file",
+                "name": "someFile.txt",
+                "sizeBytes": 100,
                 "type": "file",
             },
         ],
@@ -641,14 +608,23 @@ async def add_file(file_data: _LocalFileData) -> _FileHandle:
     },
 ]
 
+INPUT_IMAGE_HANDLE = FileHandle(
+    name="lemmy.png",
+    identifier="some-image",
+    size_bytes=41812,
+    file_type="image",
+)
+
 
 def test_user_message_attachments() -> None:
     chat = Chat()
     chat.add_user_message(
-        "What do you make of this?", _images=[IMAGE_FILEPATH], _files=[__file__]
+        "What do you make of this?",
+        images=[INPUT_IMAGE_HANDLE],
+        _files=[INPUT_FILE_HANDLE],
     )
-    history = chat._get_history_unchecked()
-    assert history["messages"] == EXPECTED_PENDING_ATTACHMENT_MESSAGES
+    history = chat._get_history()
+    assert history["messages"] == EXPECTED_USER_ATTACHMENT_MESSAGES
 
 
 def test_assistant_responses_cannot_be_multipart_or_consecutive() -> None:
@@ -694,13 +670,6 @@ def test_initial_history_with_prompt_is_disallowed() -> None:
         Chat("Initial prompt", _initial_history=chat._history)
 
 
-def test_invalid_local_file() -> None:
-    chat = Chat()
-    with pytest.raises(LMStudioOSError) as exc_info:
-        chat._add_file("No such file")
-    check_sdk_error(exc_info, __file__)
-
-
 EXPECTED_CHAT_STR = """\
 Chat.from_history({
   "messages": [
@@ -758,11 +727,3 @@ def test_chat_duplication(clone: Callable[[Chat], Chat]) -> None:
     for source_message, cloned_message in zip(chat_messages, cloned_messages):
         assert cloned_message is not source_message
         assert cloned_message == source_message
-
-
-@pytest.mark.parametrize("clone", CLONING_MECHANISMS)
-def test_cannot_clone_with_pending_files(clone: Callable[[Chat], Chat]) -> None:
-    chat = Chat("Initial system prompt")
-    chat._add_file(__file__)
-    with pytest.raises(LMStudioRuntimeError, match="Cannot copy chat history"):
-        clone(chat)