Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions torchstore/storage_volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
import torch
from monarch.actor import Actor, endpoint

from torchstore.transport.buffers import TransportBuffer

from torchstore.transport.buffers import (
create_default_transport_buffer,
TransportBuffer,
)
from torchstore.transport.pipe import Request, TensorSlice
from torchstore.utils import assemble_global_tensor, spawn_actors

Expand Down Expand Up @@ -59,10 +61,8 @@ async def put(
await self.store.put(key, transport_buffer, request)

@endpoint
async def get(
self, key: str, transport_buffer: TransportBuffer, request: Request
) -> TransportBuffer:
return await self.store.get(key, transport_buffer, request)
async def get(self, key: str, request: Request) -> TransportBuffer:
return await self.store.get(key, request)

@endpoint
async def get_meta(
Expand All @@ -86,9 +86,7 @@ async def put(
"""Store data in the storage backend."""
raise NotImplementedError()

async def get(
self, key: str, transport_buffer: TransportBuffer, request: Request
) -> TransportBuffer:
async def get(self, key: str, request: Request) -> TransportBuffer:
"""Retrieve data from the storage backend."""
raise NotImplementedError()

Expand Down Expand Up @@ -201,13 +199,13 @@ async def put(

self.kv[key] = tensor

async def get(
self, key: str, transport_buffer: TransportBuffer, request: Request
) -> TransportBuffer:
async def get(self, key: str, request: Request) -> TransportBuffer:

if key not in self.kv:
raise KeyError(f"Key '{key}' not found. {list(self.kv.keys())=}")

transport_buffer = create_default_transport_buffer()

# TODO: clean up
val = self.kv[key]
if isinstance(val, dict) and "obj" in val:
Expand All @@ -216,7 +214,7 @@ async def get(
return transport_buffer

if request.tensor_slice is None:
await transport_buffer.write_from(self.kv[key])
transport_buffer.from_contiguous_tensor(self.kv[key])
return transport_buffer

# TODO:
Expand All @@ -227,7 +225,7 @@ async def get(

for shard in self.kv[key].values():
if shard["slice"] == request.tensor_slice:
await transport_buffer.write_from(shard["tensor"])
transport_buffer.from_contiguous_tensor(shard["tensor"])
return transport_buffer

raise RuntimeError(f"Tensor slice {request.tensor_slice} not found in {key}")
Expand Down
31 changes: 26 additions & 5 deletions torchstore/transport/buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

import functools

import logging
import os
from typing import Any, Dict, List, Optional, Tuple, Union
Expand All @@ -30,13 +34,21 @@ def RDMABuffer(*args: Any, **kwargs: Any) -> Any:
# assert RDMA_CHUNK_SIZE_MB <= 1024, "Monarch does not support 1gb chunks via rdma"


@functools.cache
def rdma_available() -> bool:
rdma_enabled = (
os.environ.get("TORCHSTORE_RDMA_ENABLED", "0") == "1"
os.environ.get("TORCHSTORE_RDMA_ENABLED", "1") == "1"
) # TODO: enable on this build
return rdma_enabled and monarch_rdma_available()


def create_default_transport_buffer() -> TransportBuffer:
if rdma_available():
return RDMATransportBuffer()
else:
return MonarchTransportBuffer()


class TransportBuffer:
finalize: bool = False
is_object: bool = False
Expand All @@ -49,10 +61,7 @@ def update(self, other_buffer: "TransportBuffer") -> None:
self.objects = other_buffer.objects
self.requires_meta = other_buffer.requires_meta

def allocate(self, tensor_like: Union[torch.Tensor, Tuple]) -> None:
"""Allocates internal buffers based on either an existing tensor
or a Tuple of (shape, dtype)
"""
def from_contiguous_tensor(self, tensor: torch.Tensor) -> None:
raise NotImplementedError()

async def read_into(self, tensor: Optional[torch.Tensor]) -> torch.Tensor:
Expand Down Expand Up @@ -190,6 +199,15 @@ async def write_from(self, tensor: Optional[torch.Tensor]) -> None:
for idx, chunk in enumerate(chunked_byte_view):
await self.rdma_buffers[idx].write_from(chunk)

def from_contiguous_tensor(self, tensor: torch.Tensor) -> None:
assert tensor.is_contiguous(), "Tensor must be contiguous"
self.shape = tensor.shape
self.dtype = tensor.dtype
self.dim = tensor.dim()
byte_view_chunks = self._create_byte_views_from_tensor(tensor)
self.tensor_refs = [torch.empty_like(chunk) for chunk in byte_view_chunks]
self.rdma_buffers = [RDMABuffer(chunk) for chunk in self.tensor_refs]


class MonarchTransportBuffer(TransportBuffer):
"""This interface is mostly a noop, intended to be used with Monarch's regular RPC.
Expand Down Expand Up @@ -224,3 +242,6 @@ async def write_from(self, tensor: Optional[torch.Tensor]) -> None:
def update(self, other_buffer: "TransportBuffer") -> None:
super().update(other_buffer)
self.tensor = other_buffer.tensor

def from_contiguous_tensor(self, tensor: torch.Tensor) -> None:
self.tensor = tensor
26 changes: 5 additions & 21 deletions torchstore/transport/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ def create_transport_buffer(self) -> TransportBuffer:
async def put_to_storage_volume(self, key, request: Request):
transport_buffer = self.create_transport_buffer()
tensor = request.tensor_val

transport_buffer.allocate(tensor)
await transport_buffer.write_from(tensor)
if not tensor.is_contiguous():
tensor = tensor.contiguous()
transport_buffer.from_contiguous_tensor(tensor)

# transporting tensors is handled by the buffer, so we don't want to send it
# via monarch RPC since that would generate considerable overhead
Expand All @@ -156,24 +156,8 @@ async def put_to_storage_volume(self, key, request: Request):
)

async def get_from_storage_volume(self, key, request: Request):

transport_buffer = self.create_transport_buffer()

# Certain buffers (RDMA) need to know the size of the tensor
# so we can allocate the right amount of memory locally.
# This can be avoided if the request contains a tensor slice.
# Could likely be optimized away in the future.
if transport_buffer.requires_meta and request.tensor_val is None:
meta = await self.storage_volume.get_meta.call_one(key, request.meta_only())
transport_buffer.allocate(meta)
else:
transport_buffer.allocate(request.tensor_val)

# TODO: consider placing the buffer inside the request or vice versa
transport_buffer.update(
await self.storage_volume.get.call_one(
key, transport_buffer, request.meta_only()
)
transport_buffer = await self.storage_volume.get.call_one(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're creating a race condition here -- memory is often created on the fly in storage volume to deal with non-contiguous tensors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In storage volume, all the tensors are already contiguous, and it's just handing out RDMABuffers pointing to those tensors.

key, request.meta_only()
)

if transport_buffer.is_object:
Expand Down
Loading