Skip to content
38 changes: 38 additions & 0 deletions exir/schema_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


from dataclasses import dataclass
from typing import List

from executorch.exir.scalar_type import ScalarType

# Note: check executorch/schema/data.fbs for explanations of these fields.


@dataclass
class TensorMetadata:
fully_qualified_name: str
scalar_type: ScalarType
dim_sizes: List[int]
Copy link
Contributor

@JacobSzwejbka JacobSzwejbka Nov 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: just sizes not dim_sizes

dim_order: List[bytes]

segment_index: int
offset: int


@dataclass
class DataSegment:
offset: int
size: int


@dataclass
class Data:
version: int
tensor_alignment: int
tensor_segments: List[TensorMetadata]
data_segments: List[DataSegment]
69 changes: 69 additions & 0 deletions schema/data.fbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
include "scalar_type.fbs";
namespace executorch_flatbuffer;

// Update after BC breaking changes.
file_identifier "DT01";
file_extension "ptd";

table TensorMetadata {
// The unique id used to connect the data and program.
fully_qualified_name: string;
scalar_type: ScalarType;

// Size of each dimension.
dim_sizes: [int32];

// Specifies in what order the dimensions are laid out in memory (from outer
// to inner).
//
// For example, given a rank 3 Tensor of size (3, 5, 2). If we name
// dimensions: [row, column, batch], then a dim_order of:
// - (2, 0, 1) represents a [batch, row, column] ordering where "column" is
// the innermost dimension, then comes "row", and the outermost dimension is
// "batch".
// - (0, 2, 1) represents a [row, batch, column] ordering where "column" is
// the innermost dimension, then comes "batch", and the outermost dimension
// is "row".
dim_order: [uint8];

// Segment index that the tensor data is stored in.
segment_index: uint32;

// Tensor offsets are relative to each TensorSegment.
// To retrieve a given tensor:
// 1. segment_base_offset: from the file header.
// 2. segment offset: segments[segment_index].offset
// This is likely to be 0 (all the tensors in one segment).
// 3. tensor offset: tensor_segments[i].tensor_metadata[j].offset
// Find the relevant index j by matching on tensor fqn.
offset: uint64;
}

table DataSegment {
// Segment offsets are relative to the segment base offset provided in
// the extended file header. Segments will typically be aligned in a
// way to make it possible to use mmap() to load them.
offset: uint64;

// The size in bytes of valid data starting at the offset. The segment
// data may be followed by padding before the segment that follows it,
// to make it easier to use mmap().
size: uint64;
}

table DataFile {
// Schema version.
version: uint32;

// Alignment for each tensor in bytes. Offsets of the tensor provided
// in TensorMetadata.offset are aligned to tensor_alignment.
tensor_alignment: uint32;

// Tensor information.
tensor_segments: [TensorMetadata];

// Data segments.
segments: [DataSegment];
}

root_type DataFile;
Loading