Skip to content

Commit 246c3ee

Browse files
committed
Improve performance by removing @DataClass and using __slots__
1 parent a5625eb commit 246c3ee

File tree

4 files changed

+136
-57
lines changed

4 files changed

+136
-57
lines changed

CaptureFile/CaptureFile.py

Lines changed: 135 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from contextlib import contextmanager
55
from copy import deepcopy
6-
from dataclasses import InitVar, dataclass, field
76
from functools import _lru_cache_wrapper, lru_cache
87
from io import BytesIO
98
from itertools import islice
@@ -29,7 +28,6 @@
2928
Record = Union[str, bytes]
3029

3130

32-
@dataclass
3331
class CaptureFile:
3432
"""The CaptureFile constructor opens and returns a capture file named
3533
`file_name` for reading or writing, depending on the value of `to_write`.
@@ -80,8 +78,7 @@ class CaptureFile:
8078
-1 is the default compromise which currently is equivalent to 6."""
8179

8280
_lock_start_position: ClassVar[int] = 0x7FFFFFFFFFFFFFFE
83-
_lock_end_position: ClassVar[int] = 0x7FFFFFFFFFFFFFFF
84-
_lock_size: ClassVar[int] = _lock_end_position - _lock_start_position
81+
_lock_size: ClassVar[int] = 1
8582

8683
_filenames_opened_for_write_sem: ClassVar[Semaphore] = Semaphore()
8784
_filenames_opened_for_write: ClassVar[Set[Path]] = set()
@@ -92,40 +89,65 @@ class CaptureFile:
9289
Dict[Path, "ReferenceCountedLock"]
9390
] = dict()
9491

92+
__slots__ = (
93+
"file_name",
94+
"to_write",
95+
"encoding",
96+
"use_os_file_locking",
97+
"_file_name",
98+
"_metadata",
99+
"_config",
100+
"_file",
101+
"_compression_block",
102+
"_current_master_node",
103+
"_new_is_in_progress",
104+
"_record_count",
105+
"_block_cache",
106+
"_full_node_cache",
107+
)
108+
95109
file_name: str
96-
to_write: bool = False
97-
initial_metadata: InitVar[Optional[bytes]] = None
98-
force_new_empty_file: InitVar[bool] = False
99-
encoding: Optional[str] = "utf_8"
100-
use_os_file_locking: bool = False
101-
compression_block_size: InitVar[int] = 32768
110+
to_write: bool
111+
encoding: Optional[str]
112+
use_os_file_locking: bool
102113

103-
_file_name: Path = field(init=False)
104-
"""A "Path" instance of file_name set during __post_init__"""
114+
_file_name: Path
115+
"""A "Path" instance of file_name set during __init__"""
105116

106-
_metadata: Optional[bytes] = field(init=False, default=None)
117+
_metadata: Optional[bytes]
107118

108-
_config: "CaptureFileConfiguration" = field(init=False)
119+
_config: "CaptureFileConfiguration"
109120

110-
_file: Optional[IO[bytes]] = field(init=False, default=None)
121+
_file: Optional[IO[bytes]]
111122

112-
_compression_block: "BytesStream" = field(init=False)
123+
_compression_block: "BytesStream"
113124

114-
_current_master_node: "MasterNode" = field(init=False)
125+
_current_master_node: "MasterNode"
115126

116-
_new_is_in_progress: bool = field(init=False)
127+
_new_is_in_progress: bool
117128

118-
_record_count: int = field(init=False)
129+
_record_count: int
119130

120-
_block_cache: _lru_cache_wrapper = field(init=False)
121-
_full_node_cache: _lru_cache_wrapper = field(init=False)
131+
_block_cache: _lru_cache_wrapper
132+
_full_node_cache: _lru_cache_wrapper
122133

123-
def __post_init__(
134+
def __init__(
124135
self,
125-
initial_metadata: Optional[bytes],
126-
force_new_empty_file: bool,
127-
compression_block_size: int,
136+
file_name: str,
137+
to_write: bool = False,
138+
initial_metadata: Optional[bytes] = None,
139+
force_new_empty_file: bool = False,
140+
encoding: Optional[str] = "utf_8",
141+
use_os_file_locking: bool = False,
142+
compression_block_size: int = 32768,
128143
):
144+
self.file_name = file_name
145+
self.to_write = to_write
146+
self.encoding = encoding
147+
self.use_os_file_locking = use_os_file_locking
148+
149+
self._metadata = None
150+
self._file = None
129151
self._block_cache = lru_cache(maxsize=10)(self._block_cache_method)
130152
self._full_node_cache = lru_cache(maxsize=10)(self._full_node_cache_method)
131153

@@ -135,6 +157,7 @@ def __post_init__(
135157
self._new_is_in_progress = True
136158
self._new_file(initial_metadata, compression_block_size)
137159
self._new_is_in_progress = False
160+
138161
self.open(self.to_write)
139162

140163
def __str__(self):
@@ -330,7 +353,6 @@ def _fetch_sized_data(self, start_position: int, /) -> bytes:
330353
return self._fetch_data(start_position + 4, size)
331354

332355
def _fetch_data(self, start_position: int, size: int, /) -> bytes:
333-
334356
assert self._file
335357
written_limit = (
336358
self._file_limit() // self._config.page_size * self._config.page_size
@@ -629,7 +651,6 @@ def _record_generator(
629651
power: int,
630652
/,
631653
) -> Generator[Record, None, None]:
632-
633654
rightmost_node = rightmost_path.rightmost_node(height)
634655
power = power // self._config.fan_out
635656

@@ -928,7 +949,6 @@ def commit(self, /):
928949
self._file.flush()
929950

930951

931-
@dataclass
932952
class CaptureFileConfiguration:
933953
"""The persistent configuration values of the capture file that are stored
934954
in the first bytes of the file.
@@ -939,46 +959,70 @@ class CaptureFileConfiguration:
939959
Default values are provided if a new instance of this class is created
940960
directly from its constructor"""
941961

942-
version: int = 2
962+
__slots__ = (
963+
"version",
964+
"page_size",
965+
"compression_block_size",
966+
"fan_out",
967+
"master_node_size",
968+
"master_node_positions",
969+
"compression_block_start",
970+
"initial_file_limit",
971+
"full_node_struct",
972+
)
973+
974+
version: int
943975
"""The version indicates the compatibility of code with file structure.
944976
945977
Code with a version higher than the one stored in file should be capable of
946978
reading and writing to the file but a file with a higher version number than
947979
what is in the code will not be usable."""
948980

949-
page_size: int = 4096
981+
page_size: int
950982
"""Pages of page_size bytes are used in various places as a minimum block of
951983
data. See DESIGN.md for how pages are used."""
952984

953-
compression_block_size: int = 32768
985+
compression_block_size: int
954986
"""Minimum number of bytes to compress and write out. While data is
955987
accumulating it is recorded in the master node but after this limit is
956988
exceeded it will be compressed and written out"""
957989

958-
fan_out: int = 32
990+
fan_out: int
959991
"""The maximum number of children in the index tree's nodes. For more
960992
information about the tree structure and usage see DESIGN.md"""
961993

962-
master_node_size: int = field(init=False)
994+
master_node_size: int
963995

964-
master_node_positions: Tuple[int] = field(init=False)
996+
master_node_positions: tuple[int, int]
965997
"""The two starting byte positions in the file of the two master nodes"""
966998

967-
compression_block_start: int = field(init=False)
968-
initial_file_limit: int = field(init=False)
969-
full_node_struct: Struct = field(init=False)
999+
compression_block_start: int
1000+
initial_file_limit: int
1001+
full_node_struct: Struct
9701002

9711003
current_version: ClassVar[int] = 2
9721004
"""The code's current version which can support any earlier version
9731005
recorded in the file"""
9741006

9751007
capture_file_type: ClassVar[bytes] = b"MioCapture\0"
1008+
"Length of capture_file_type is 11 which is where the 11 in >11s4L used in struct comes from"
9761009

977-
struct: ClassVar[Struct] = Struct(f">{len(capture_file_type)}s4L")
1010+
struct: ClassVar[Struct] = Struct(">11s4L")
9781011
"""Struct = String("MioCapture\0"), Long(version), Long(page_size),
9791012
Long(compression_block_size), Long(fan_out)"""
9801013

981-
def __post_init__(self, /):
1014+
def __init__(
1015+
self,
1016+
version: int = 2,
1017+
page_size: int = 4096,
1018+
compression_block_size: int = 32768,
1019+
fan_out: int = 32,
1020+
):
1021+
self.version = version
1022+
self.page_size = page_size
1023+
self.compression_block_size = compression_block_size
1024+
self.fan_out = fan_out
1025+
9821026
assert (
9831027
self.compression_block_size % self.page_size == 0
9841028
), "compression block size must be a multiple of page size"
@@ -991,10 +1035,10 @@ def __post_init__(self, /):
9911035
# compress_and_flush_if_full to know for certain no writing is happening
9921036
# on the first page after the file is created even across multiple OS
9931037
# process.
994-
self.master_node_positions = [
1038+
self.master_node_positions = (
9951039
self.page_size,
9961040
self.page_size + self.master_node_size,
997-
]
1041+
)
9981042
last_master_page_start = self.page_size - 4
9991043
last_master_page_end = last_master_page_start + self.page_size
10001044
self.compression_block_start = last_master_page_end
@@ -1049,7 +1093,6 @@ def write(self, file, /):
10491093
file.write(buffer)
10501094

10511095

1052-
@dataclass
10531096
class MasterNode:
10541097
"""
10551098
A MasterNode tracks where things are in the capture file.
@@ -1063,6 +1106,15 @@ class MasterNode:
10631106
struct: ClassVar[Struct] = Struct(f">LQL")
10641107
"""Struct = serial_number, file_limit, compression_block_len ">LQL" """
10651108

1109+
__slots__ = (
1110+
"serial_number",
1111+
"file_limit",
1112+
"metadata_pointer",
1113+
"rightmost_path",
1114+
"contents_of_last_page",
1115+
"compression_block_contents",
1116+
)
1117+
10661118
serial_number: int
10671119
"""MasterNode with largest serial_number is the active one
10681120
@@ -1093,6 +1145,22 @@ class MasterNode:
10931145
the file_limit once there is at least compression_block_size data
10941146
present"""
10951147

1148+
def __init__(
1149+
self,
1150+
serial_number: int,
1151+
file_limit: int,
1152+
metadata_pointer: "DataCoordinates",
1153+
rightmost_path: "RightmostPath",
1154+
contents_of_last_page: bytearray,
1155+
compression_block_contents: bytes,
1156+
):
1157+
self.serial_number = serial_number
1158+
self.file_limit = file_limit
1159+
self.metadata_pointer = metadata_pointer
1160+
self.rightmost_path = rightmost_path
1161+
self.contents_of_last_page = contents_of_last_page
1162+
self.compression_block_contents = compression_block_contents
1163+
10961164
@classmethod
10971165
def new_from(cls, master_node_buffer: bytes, page_size: int, /) -> "MasterNode":
10981166
(serial_number, file_limit, compression_block_len) = cls.struct.unpack_from(
@@ -1170,7 +1238,6 @@ def as_bytes(self, config: CaptureFileConfiguration, /) -> bytes:
11701238
return stream.getvalue()
11711239

11721240

1173-
@dataclass
11741241
class RightmostPath:
11751242
"""A list of RightmostNodes in height order (leaf -> root), one for each
11761243
level in the tree.
@@ -1181,12 +1248,12 @@ class RightmostPath:
11811248
number_of_children_struct: ClassVar[Struct] = Struct(">L")
11821249
"""Big-endian unsigned long ">L" """
11831250

1184-
rightmost_nodes: List["RightmostNode"] = field(default_factory=list, init=False)
1251+
__slots__ = "rightmost_nodes"
11851252

1186-
buffer: InitVar[Optional[bytes]] = None
1187-
offset: InitVar[int] = 0
1253+
rightmost_nodes: List["RightmostNode"]
11881254

1189-
def __post_init__(self, buffer: Optional[bytes], offset: int, /):
1255+
def __init__(self, buffer: Optional[bytes] = None, offset=0) -> None:
1256+
self.rightmost_nodes = []
11901257
if buffer is not None:
11911258
(
11921259
total_number_of_children,
@@ -1279,7 +1346,6 @@ def write_rightmost_nodes(self, stream: "BytesStream", /):
12791346
rightmost_node.write_with_height(stream, height)
12801347

12811348

1282-
@dataclass
12831349
class RightmostNode:
12841350
"""This is the rightmost node of a level in the tree index of all records
12851351
and is not referred to by any parent node.
@@ -1301,7 +1367,12 @@ class RightmostNode:
13011367
and ready to be filled again.
13021368
"""
13031369

1304-
children: List["DataCoordinates"] = field(default_factory=list, init=False)
1370+
__slots__ = "children"
1371+
1372+
children: List["DataCoordinates"]
1373+
1374+
def __init__(self) -> None:
1375+
self.children = []
13051376

13061377
def add_child(self, data_coordinate: "DataCoordinates", /):
13071378
self.children.append(data_coordinate)
@@ -1333,7 +1404,6 @@ def child_count(self, /) -> int:
13331404
return len(self.children)
13341405

13351406

1336-
@dataclass(frozen=True)
13371407
class DataCoordinates:
13381408
"""The two-dimensional coordinates of data within a capture file.
13391409
@@ -1352,6 +1422,8 @@ class DataCoordinates:
13521422
block_size_struct: ClassVar[Struct] = Struct(">L")
13531423
"""Big-endian unsigned long ">L" """
13541424

1425+
__slots__ = ("compressed_block_start", "data_start")
1426+
13551427
compressed_block_start: int
13561428
"""The start position of the compressed block in capture file"""
13571429

@@ -1360,7 +1432,7 @@ class DataCoordinates:
13601432
the compressed block"""
13611433

13621434
@classmethod
1363-
def from_bytes(cls, block: bytes, offset: int, /) -> "DataCoordinates":
1435+
def from_bytes(cls, block: bytes | memoryview, offset: int, /) -> "DataCoordinates":
13641436
return cls(*cls.struct.unpack_from(block, offset))
13651437

13661438
@classmethod
@@ -1384,6 +1456,10 @@ def from_bytes_with_height_prefix(
13841456
def null(cls, /) -> "DataCoordinates":
13851457
return cls(0, 0)
13861458

1459+
def __init__(self, compressed_block_start: int, data_start: int) -> None:
1460+
self.compressed_block_start = compressed_block_start
1461+
self.data_start = data_start
1462+
13871463
def write_data_coordinate(self, stream: "BytesStream", /):
13881464
stream.write(
13891465
DataCoordinates.struct.pack(self.compressed_block_start, self.data_start)
@@ -1447,10 +1523,15 @@ def zero_fill_to(self, end_position: int, /):
14471523
self.write(b"\0" * (end_position - self.tell()))
14481524

14491525

1450-
@dataclass
14511526
class ReferenceCountedLock:
1452-
_reference_count: int = 0
1453-
_lock: Lock = Lock()
1527+
1528+
__slots__ = ("_reference_count", "_lock" )
1529+
_reference_count: int
1530+
_lock: Lock
1531+
1532+
def __init__(self) -> None:
1533+
self._reference_count = 0
1534+
self._lock = Lock()
14541535

14551536
def add_reference(self) -> None:
14561537
self._reference_count += 1

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ API](https://github.com/MIOsoft/CaptureFile-Python/blob/master/docs/CaptureFile.
4040
The detailed description covers several useful APIs and parameters that are not
4141
covered in the Quickstart below.
4242

43-
To work with capture files visually, you can use the free [MIObdt](https://miosoft.com/miobdt/) application.
4443

4544
## Install
4645

0 commit comments

Comments
 (0)