@@ -10,29 +10,29 @@ Main header class for SCDL archives.
1010
1111``` python
1212class SCDLHeader :
13- def __init__ (self , version = None , backend = Backend.MEMMAP_V0 ,
13+ def __init__ (self , version = None , backend = Backend.MEMMAP_V0 ,
1414 arrays = None , feature_indices = None )
15-
15+
1616 # Array management
1717 def add_array (self , array_info : ArrayInfo) -> None
1818 def get_array(self , name: str ) -> Optional[ArrayInfo]
1919 def remove_array(self , name: str ) -> bool
20-
21- # Feature index management
20+
21+ # Feature index management
2222 def add_feature_index(self , feature_index: FeatureIndexInfo) -> None
2323 def get_feature_index(self , name: str ) -> Optional[FeatureIndexInfo]
2424 def remove_feature_index(self , name: str ) -> bool
25-
25+
2626 # Serialization
2727 def serialize(self ) -> bytes
2828 @ classmethod
2929 def deserialize(cls , data: bytes ) -> ' SCDLHeader'
30-
30+
3131 # File I/O
3232 def save(self , file_path: str ) -> None
3333 @ classmethod
3434 def load(cls , file_path: str ) -> ' SCDLHeader'
35-
35+
3636 # Validation and utilities
3737 def validate(self ) -> None
3838 def calculate_total_size(self ) -> int
@@ -46,21 +46,21 @@ Information about arrays in the archive.
4646
4747```python
4848class ArrayInfo:
49- def __init__ (self , name : str , length : int , dtype : ArrayDType,
49+ def __init__ (self , name : str , length : int , dtype : ArrayDType,
5050 shape : Optional[Tuple[int , ... ]] = None )
51-
51+
5252 # Properties
5353 name: str # Array filename
5454 length: int # Number of elements
5555 dtype: ArrayDType # Data type
5656 shape: Optional[Tuple[int , ... ]] # Optional shape
57-
57+
5858 # Serialization
5959 def serialize (self , codec : BinaryHeaderCodec) -> bytes
6060 @ classmethod
61- def deserialize(cls , codec: BinaryHeaderCodec, data: bytes ,
61+ def deserialize(cls , codec: BinaryHeaderCodec, data: bytes ,
6262 offset: int = 0 ) -> Tuple[' ArrayInfo' , int ]
63-
63+
6464 # Utilities
6565 def calculate_size(self ) -> int
6666```
@@ -74,20 +74,20 @@ class FeatureIndexInfo:
7474 def __init__ (self , name : str , length : int , dtype : ArrayDType,
7575 index_files : Optional[List[str ]] = None ,
7676 shape : Optional[Tuple[int , ... ]] = None )
77-
77+
7878 # Properties
7979 name: str # Index name
8080 length: int # Number of entries
8181 dtype: ArrayDType # Data type
8282 index_files: List[str ] # Associated index files
8383 shape: Optional[Tuple[int , ... ]] # Optional shape
84-
84+
8585 # Serialization
8686 def serialize (self , codec : BinaryHeaderCodec) -> bytes
8787 @ classmethod
8888 def deserialize(cls , codec: BinaryHeaderCodec, data: bytes ,
8989 offset: int = 0 ) -> Tuple[' FeatureIndexInfo' , int ]
90-
90+
9191 # Utilities
9292 def calculate_size(self ) -> int
9393```
@@ -101,18 +101,18 @@ Data types for arrays.
101101```python
102102class ArrayDType(IntEnum):
103103 UINT8_ARRAY = 1 # 8-bit unsigned integers
104- UINT16_ARRAY = 2 # 16-bit unsigned integers
104+ UINT16_ARRAY = 2 # 16-bit unsigned integers
105105 UINT32_ARRAY = 3 # 32-bit unsigned integers
106106 UINT64_ARRAY = 4 # 64-bit unsigned integers
107107 FLOAT16_ARRAY = 5 # 16-bit floating point
108108 FLOAT32_ARRAY = 6 # 32-bit floating point
109109 FLOAT64_ARRAY = 7 # 64-bit floating point
110110 STRING_ARRAY = 8 # Variable-length strings
111111 FIXED_STRING_ARRAY = 9 # Fixed-length strings
112-
112+
113113 @ property
114114 def numpy_dtype_string(self ) -> str # Get NumPy dtype string
115-
115+
116116 @ classmethod
117117 def from_numpy_dtype(cls , dtype) -> ' ArrayDType' # Convert from NumPy dtype
118118```
@@ -131,12 +131,12 @@ class Backend(IntEnum):
131131# ## Header Operations
132132
133133```python
134- def create_header_from_arrays(array_files: List[str ],
134+ def create_header_from_arrays(array_files: List[str ],
135135 backend: Backend = Backend.MEMMAP_V0 ,
136136 version: Optional[SCDLVersion] = None ) -> SCDLHeader
137137 """ Create header by scanning array files."""
138138
139- def validate_header_compatibility(header1: SCDLHeader,
139+ def validate_header_compatibility(header1: SCDLHeader,
140140 header2: SCDLHeader) -> bool
141141 """ Check if two headers are compatible for merging."""
142142
@@ -149,10 +149,10 @@ def merge_headers(header1: SCDLHeader, header2: SCDLHeader) -> SCDLHeader
149149```python
150150class HeaderReader:
151151 def __init__ (self , file_path: str )
152-
152+
153153 def validate_magic(self ) -> bool # Quick magic number check
154154 def get_version(self ) -> SCDLVersion # Get version info
155- def get_backend(self ) -> Backend # Get backend info
155+ def get_backend(self ) -> Backend # Get backend info
156156 def get_array_count(self ) -> int # Get array count
157157 def get_full_header(self ) -> SCDLHeader # Get complete header
158158```
@@ -162,9 +162,9 @@ class HeaderReader:
162162```python
163163class SCDLVersion:
164164 major: int = 0
165- minor: int = 0
165+ minor: int = 0
166166 point: int = 0
167-
167+
168168 def __str__ (self ) -> str # "major.minor.point"
169169 def __eq__ (self , other) -> bool
170170 def __ne__ (self , other) -> bool
@@ -181,8 +181,8 @@ class CurrentSCDLVersion(SCDLVersion):
181181from bionemo.scdl.schema.magic import SCDL_MAGIC_NUMBER
182182from bionemo.scdl.schema.headerutil import Endianness
183183
184- SCDL_MAGIC_NUMBER : bytes = b ' SCDL' # Archive magic number
185- Endianness.NETWORK # Network byte order (required)
184+ SCDL_MAGIC_NUMBER : bytes = b " SCDL" # Archive magic number
185+ Endianness.NETWORK # Network byte order (required)
186186```
187187
188188# # Exceptions
@@ -245,7 +245,7 @@ reader = HeaderReader("large_header.bin")
245245if reader.validate_magic():
246246 print (f " Version: { reader.get_version()} " )
247247 print (f " Arrays: { reader.get_array_count()} " )
248-
248+
249249 # Only load full header when needed
250250 if reader.get_array_count() > 0 :
251251 full_header = reader.get_full_header()
@@ -258,10 +258,10 @@ import numpy as np
258258from bionemo.scdl.schema.header import ArrayDType
259259
260260# Convert various numpy dtypes to ArrayDType enums
261- array_dtype1 = ArrayDType.from_numpy_dtype(np.float32) # Type class
262- array_dtype2 = ArrayDType.from_numpy_dtype(' float32' ) # String
263- array_dtype3 = ArrayDType.from_numpy_dtype(np.dtype(' f4 ' )) # Dtype object
261+ array_dtype1 = ArrayDType.from_numpy_dtype(np.float32) # Type class
262+ array_dtype2 = ArrayDType.from_numpy_dtype(" float32" ) # String
263+ array_dtype3 = ArrayDType.from_numpy_dtype(np.dtype(" f4 " )) # Dtype object
264264
265265# Use in ArrayInfo creation
266266array = ArrayInfo(" data.dat" , 1000 , array_dtype1)
267- ```
267+ ```
0 commit comments