Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions torchstore/storage_volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
import torch
from monarch.actor import Actor, endpoint

from torchstore.transport.buffers import TransportBuffer

from torchstore.transport.buffers import (
create_default_transport_buffer,
TransportBuffer,
)
from torchstore.transport.pipe import Request, TensorSlice
from torchstore.utils import assemble_global_tensor, spawn_actors

Expand Down Expand Up @@ -59,10 +61,8 @@ async def put(
await self.store.put(key, transport_buffer, request)

@endpoint
async def get(
self, key: str, transport_buffer: TransportBuffer, request: Request
) -> TransportBuffer:
return await self.store.get(key, transport_buffer, request)
async def get(self, key: str, request: Request) -> TransportBuffer:
return await self.store.get(key, request)

@endpoint
async def get_meta(
Expand All @@ -86,9 +86,7 @@ async def put(
"""Store data in the storage backend."""
raise NotImplementedError()

async def get(
self, key: str, transport_buffer: TransportBuffer, request: Request
) -> TransportBuffer:
async def get(self, key: str, request: Request) -> TransportBuffer:
"""Retrieve data from the storage backend."""
raise NotImplementedError()

Expand Down Expand Up @@ -202,13 +200,13 @@ async def put(

self.kv[key] = tensor

async def get(
self, key: str, transport_buffer: TransportBuffer, request: Request
) -> TransportBuffer:
async def get(self, key: str, request: Request) -> TransportBuffer:

if key not in self.kv:
raise KeyError(f"Key '{key}' not found. {list(self.kv.keys())=}")

transport_buffer = create_default_transport_buffer()

# TODO: clean up
val = self.kv[key]
if isinstance(val, dict) and "obj" in val:
Expand All @@ -217,7 +215,7 @@ async def get(
return transport_buffer

if request.tensor_slice is None:
await transport_buffer.write_from(self.kv[key])
transport_buffer.from_contiguous_tensor(self.kv[key])
return transport_buffer

# TODO:
Expand All @@ -228,7 +226,7 @@ async def get(

for shard in self.kv[key].values():
if shard["slice"] == request.tensor_slice:
await transport_buffer.write_from(shard["tensor"])
transport_buffer.from_contiguous_tensor(shard["tensor"])
return transport_buffer

raise RuntimeError(f"Tensor slice {request.tensor_slice} not found in {key}")
Expand Down
39 changes: 35 additions & 4 deletions torchstore/transport/buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

import functools

import logging
import os
from typing import Any, Dict, List, Optional, Tuple, Union
Expand All @@ -28,13 +32,21 @@ def RDMABuffer(*args: Any, **kwargs: Any) -> Any:
)


@functools.cache
def rdma_available() -> bool:
rdma_enabled = (
os.environ.get("TORCHSTORE_RDMA_ENABLED", "1") == "1"
) # TODO: enable on this build
return rdma_enabled and monarch_rdma_available()


def create_default_transport_buffer() -> TransportBuffer:
if rdma_available():
return RDMATransportBuffer()
else:
return MonarchTransportBuffer()


class TransportBuffer:
finalize: bool = False
is_object: bool = False
Expand All @@ -47,10 +59,7 @@ def update(self, other_buffer: "TransportBuffer") -> None:
self.objects = other_buffer.objects
self.requires_meta = other_buffer.requires_meta

def allocate(self, tensor_like: Union[torch.Tensor, Tuple]) -> None:
"""Allocates internal buffers based on either an existing tensor
or a Tuple of (shape, dtype)
"""
def from_contiguous_tensor(self, tensor: torch.Tensor) -> None:
raise NotImplementedError()

async def read_into(self, tensor: Optional[torch.Tensor]) -> torch.Tensor:
Expand All @@ -59,6 +68,9 @@ async def read_into(self, tensor: Optional[torch.Tensor]) -> torch.Tensor:
async def write_from(self, tensor: Optional[torch.Tensor]) -> None:
raise NotImplementedError()

async def drop(self) -> None:
pass


class RDMATransportBuffer(TransportBuffer):
# TODO: when we try this with rdma, I should be able to write rdma directly to the tensor
Expand Down Expand Up @@ -173,6 +185,12 @@ async def read_into(self, tensor: Optional[torch.Tensor] = None) -> torch.Tensor

return tensor

async def drop(self) -> None:
if self.rdma_buffers is not None:
for buffer in self.rdma_buffers:
await buffer.drop()
self.tensor_refs = None

# recv
async def write_from(self, tensor: Optional[torch.Tensor]) -> None:
if tensor is None:
Expand All @@ -195,6 +213,16 @@ async def write_from(self, tensor: Optional[torch.Tensor]) -> None:
for idx, chunk in enumerate(chunked_byte_view):
await self.rdma_buffers[idx].write_from(chunk)

def from_contiguous_tensor(self, tensor: torch.Tensor) -> None:
"""The caller must ensure that the tensor lives long enough until the buffer is used."""
assert tensor.is_contiguous(), "Tensor must be contiguous"
self.shape = tensor.shape
self.dtype = tensor.dtype
self.dim = tensor.dim()
self.rdma_buffers = [
RDMABuffer(chunk) for chunk in self._create_byte_views_from_tensor(tensor)
]


class MonarchTransportBuffer(TransportBuffer):
"""This interface is mostly a noop, intended to be used with Monarch's regular RPC.
Expand Down Expand Up @@ -229,3 +257,6 @@ async def write_from(self, tensor: Optional[torch.Tensor]) -> None:
def update(self, other_buffer: "TransportBuffer") -> None:
super().update(other_buffer)
self.tensor = other_buffer.tensor

def from_contiguous_tensor(self, tensor: torch.Tensor) -> None:
self.tensor = tensor
35 changes: 13 additions & 22 deletions torchstore/transport/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,38 +145,29 @@ def create_transport_buffer(self) -> TransportBuffer:
async def put_to_storage_volume(self, key, request: Request):
transport_buffer = self.create_transport_buffer()
tensor = request.tensor_val

transport_buffer.allocate(tensor)
await transport_buffer.write_from(tensor)
if tensor is not None:
# TODO: investigate why RDMA fails on CUDA tensors
tensor = tensor.cpu()
if not tensor.is_contiguous():
tensor = tensor.contiguous()
transport_buffer.from_contiguous_tensor(tensor)

# transporting tensors is handled by the buffer, so we don't want to send it
# via monarch RPC since that would generate considerable overhead
await self.storage_volume.put.call_one(
key, transport_buffer, request.meta_only()
)

async def get_from_storage_volume(self, key, request: Request):

transport_buffer = self.create_transport_buffer()

# Certain buffers (RDMA) need to know the size of the tensor
# so we can allocate the right amount of memory locally.
# This can be avoided if the request contains a tensor slice.
# Could likely be optimized away in the future.
if transport_buffer.requires_meta and request.tensor_val is None:
meta = await self.storage_volume.get_meta.call_one(key, request.meta_only())
transport_buffer.allocate(meta)
else:
transport_buffer.allocate(request.tensor_val)
await transport_buffer.drop()

# TODO: consider placing the buffer inside the request or vice versa
transport_buffer.update(
await self.storage_volume.get.call_one(
key, transport_buffer, request.meta_only()
)
async def get_from_storage_volume(self, key, request: Request):
transport_buffer = await self.storage_volume.get.call_one(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're creating a race condition here -- memory is often created on the fly in storage volume to deal with non-contiguous tensors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In storage volume, all the tensors are already contiguous, and it's just handing out RDMABuffers pointing to those tensors.

key, request.meta_only()
)

if transport_buffer.is_object:
return transport_buffer.objects

return await transport_buffer.read_into(request.tensor_val)
ret = await transport_buffer.read_into(request.tensor_val)
await transport_buffer.drop()
return ret