-
Couldn't load subscription status.
- Fork 704
Introduce data schema to store raw tensors #6540
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
f24016d
229527e
cec36f6
859cf52
c8d6489
d7b7162
9043914
c829c57
70c9f7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| # All rights reserved. | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
|
|
||
| # pyre-strict | ||
|
|
||
| from dataclasses import dataclass | ||
| from typing import List | ||
|
|
||
| from executorch.exir.scalar_type import ScalarType | ||
|
|
||
| # Note: check executorch/schema/data.fbs for explanations of these fields. | ||
|
|
||
|
|
||
| @dataclass | ||
| class TensorMetadata: | ||
| fully_qualified_name: str | ||
| scalar_type: ScalarType | ||
| dim_sizes: List[int] | ||
| dim_order: List[bytes] | ||
| storage_offset: int | ||
| layout: int | ||
|
|
||
| offset: int | ||
| size: int | ||
|
|
||
|
|
||
| @dataclass | ||
| class TensorSegment: | ||
| segment_index: int | ||
| tensor_metadata: List[TensorMetadata] | ||
|
|
||
|
|
||
| @dataclass | ||
| class DataSegment: | ||
| offset: int | ||
| size: int | ||
|
|
||
|
|
||
| @dataclass | ||
| class Data: | ||
| version: int | ||
| tensor_alignment: int | ||
| tensor_segments: List[TensorSegment] | ||
| data_segments: List[DataSegment] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| include "scalar_type.fbs"; | ||
| namespace executorch_flatbuffer; | ||
|
|
||
| // Update after BC breaking changes. | ||
| file_identifier "DT01"; | ||
| file_extension "data"; | ||
|
|
||
| table TensorMetadata { | ||
| // The unique id used to connect the data and program. | ||
| fully_qualified_name:string; | ||
| scalar_type:ScalarType; | ||
|
|
||
| // Size of each dimension. | ||
| dim_sizes:[int]; | ||
|
||
|
|
||
| // Specifies in what order the dimensions are laid out in memory (from outer | ||
| // to inner). | ||
| // | ||
| // For example, given a rank 3 Tensor of size (3, 5, 2). If we name | ||
| // dimensions: [row, column, batch], then a dim_order of: | ||
| // - (2, 0, 1) represents a [batch, row, column] ordering where "column" is | ||
| // the innermost dimension, then comes "row", and the outermost dimension is | ||
| // "batch". | ||
| // - (0, 2, 1) represents a [row, batch, column] ordering where "column" is | ||
| // the innermost dimension, then comes "batch", and the outermost dimension | ||
| // is "row". | ||
| dim_order:[ubyte]; | ||
|
|
||
| // Offset in scalar_type elements (e.g., multiples of 4 bytes for an int | ||
| // scalar type) from the beginning of the tensor buffer to the beginning of | ||
| // the actual data. Currently, the runtime only supports a value of zero. | ||
| storage_offset:int; | ||
|
||
|
|
||
| // May not be needed. | ||
| layout:byte; | ||
|
|
||
| // Tensor offsets are relative to each TensorSegment. | ||
| // To retrieve a given tensor: | ||
| // 1. segment_base_offset: from the file header. | ||
| // 2. segment offset: segments[tensor_segments[i].segment_index].offset | ||
| // This is likely to be 0 (all the tensors in one segment). | ||
| // 3. tensor offset: tensor_segments[i].tensor_metadata[j].offset | ||
| // May need to binary search over tensor_metadata to find the matching | ||
| // tensor using fqn. | ||
| offset: uint64; | ||
| size: uint64; | ||
| } | ||
|
|
||
| table TensorSegment { | ||
| // Index of the segment in Data.segments. | ||
| segment_index: uint; | ||
|
|
||
| // Tensor information, including the offset and size. | ||
| tensor_metadata:[TensorMetadata]; | ||
| } | ||
|
|
||
| table DataSegment { | ||
| // Segment offsets are relative to the segment base offset provided in | ||
| // the extended file header. Segments will typically be aligned in a | ||
| // way to make it possible to use mmap() to load them. | ||
| offset: uint64; | ||
|
|
||
| // The size in bytes of valid data starting at the offset. The segment | ||
| // data may be followed by padding before the segment that follows it, | ||
| // to make it easier to use mmap(). | ||
| size: uint64; | ||
| } | ||
|
|
||
| table Data { | ||
| // Schema version. | ||
| version:uint; | ||
|
|
||
| // Alignment for each tensor. | ||
| tensor_alignment: uint32; | ||
|
|
||
| // Tensor information. | ||
| tensor_segments:[TensorSegment]; | ||
|
|
||
| // Data segments. | ||
| segments:[DataSegment]; | ||
| } | ||
|
|
||
| root_type Data; | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: just sizes not dim_sizes