Skip to content

Commit 39094e0

Browse files
committed
Update API documentation
1 parent 8f15a72 commit 39094e0

File tree

3 files changed

+55
-27
lines changed

3 files changed

+55
-27
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
playground/*
21
setup.cfg
32

43
# Byte-compiled / optimized / DLL files

CaptureFile/CaptureFile.py

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ class CaptureFile:
3636
3737
If the capture file does not already exist and it is opened for write, or if
3838
`force_new_empty_file` is True, then a new file will be created and the
39-
initial value for its metadata will be the passed `initial_metadata`.
40-
These are the only cases where the passed `initial_metadata` is used, and
41-
it is provided as a way of optionally ensuring that a capture file always
42-
has metadata even when it is first created.
39+
initial value for its metadata will be the passed `initial_metadata`. These
40+
are the only cases where the passed `initial_metadata` is used, and it is
41+
provided as a way of optionally ensuring that a capture file always has
42+
metadata even when it is first created.
4343
4444
The `encoding` argument is used to decode records that are returned. The
4545
default is `utf8`, which means the binary records stored in the capture file
@@ -48,9 +48,26 @@ class CaptureFile:
4848
encodings available at
4949
https://docs.python.org/3/library/codecs.html#standard-encodings are valid.
5050
51-
Only one process can open a capture file for writing at a time. Multiple
52-
processes can open the same capture file for read simultaneously with each
53-
other and with one process that opens it for write.
51+
To ensure only one process can open a capture file for writing at a time set
52+
`use_os_file_locking` to True. Multiple processes can always open the same
53+
capture file for read simultaneously with each other and with one process
54+
that opens it for write.
55+
56+
Single process but multi-threaded applications do not need
57+
`use_os_file_locking` to be True because the CaptureFile module will manage
58+
contention using in-memory locks. File locking in some Linux operating/file
59+
systems does not work well across servers and even sometimes on a single
60+
server so be sure to verify any specific scenario that depends on file
61+
locking.
62+
63+
By default the CaptureFile is tuned for a commit size of approximately 32KB
64+
by having the default value of `compression_block_size` set to 32768. Any
65+
amount less than this is re-written every commit until the amount of data
66+
equals or exceeds this number at which point the data is compressed, written
67+
out and (mostly) never re-written again. If commits will typically contain
68+
substantially more than 32KB of data, setting `compression_block_size` to a
69+
larger number can improve the amount of compression obtained, resulting in a
70+
smaller CaptureFile.
5471
5572
An `InvalidCaptureFile` exception is raised if this constructor is used to
5673
open a file that is not a valid capture file, is in an unsupported version
@@ -81,6 +98,7 @@ class CaptureFile:
8198
force_new_empty_file: InitVar[bool] = False
8299
encoding: Optional[str] = "utf_8"
83100
use_os_file_locking: bool = False
101+
compression_block_size: InitVar[int] = 32768
84102

85103
_file_name: Path = field(init=False)
86104
"""A "Path" instance of file_name set during __post_init__"""
@@ -94,7 +112,7 @@ class CaptureFile:
94112
_compression_block: "BytesStream" = field(init=False)
95113

96114
_current_master_node: "MasterNode" = field(init=False)
97-
115+
98116
_new_is_in_progress: bool = field(init=False)
99117

100118
_record_count: int = field(init=False)
@@ -106,6 +124,7 @@ def __post_init__(
106124
self,
107125
initial_metadata: Optional[bytes],
108126
force_new_empty_file: bool,
127+
compression_block_size: int,
109128
):
110129
self._block_cache = lru_cache(maxsize=10)(self._block_cache_method)
111130
self._full_node_cache = lru_cache(maxsize=10)(self._full_node_cache_method)
@@ -114,7 +133,7 @@ def __post_init__(
114133

115134
if force_new_empty_file or (self.to_write and not self._file_name.is_file()):
116135
self._new_is_in_progress = True
117-
self._new_file(initial_metadata)
136+
self._new_file(initial_metadata, compression_block_size)
118137
self._new_is_in_progress = False
119138
self.open(self.to_write)
120139

@@ -209,12 +228,14 @@ def close(self):
209228
if CaptureFile._filenames_with_master_node_lock[
210229
self._file_name
211230
].drop_reference():
212-
del CaptureFile._filenames_with_master_node_lock[self._file_name]
231+
del CaptureFile._filenames_with_master_node_lock[
232+
self._file_name
233+
]
213234

214235
def __del__(self):
215236
self.close()
216237

217-
def _new_file(self, initial_metadata: Optional[bytes]):
238+
def _new_file(self, initial_metadata: Optional[bytes], compression_block_size: int):
218239
"""Creates a new capture file with name `file_name`.
219240
220241
If the file already exists, it is overwritten by the newly created file.
@@ -231,7 +252,9 @@ def _new_file(self, initial_metadata: Optional[bytes]):
231252
)
232253
CaptureFile._filenames_opened_for_write.add(self._file_name)
233254

234-
self._config = CaptureFileConfiguration()
255+
self._config = CaptureFileConfiguration(
256+
compression_block_size=compression_block_size
257+
)
235258
self._init_compression_block()
236259

237260
# First build the capture file as a temporary file so that we never have
@@ -595,7 +618,7 @@ def record_generator(
595618
starting_record_number - 1,
596619
rightmost_path,
597620
height,
598-
self._config.fan_out ** height,
621+
self._config.fan_out**height,
599622
)
600623

601624
def _record_generator(

docs/CaptureFile.CaptureFile.md

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ If the capture file does not already exist and it is opened for write, or if `fo
99

1010
The `encoding` argument is used to decode records that are returned. The default is `utf8`, which means the binary records stored in the capture file will be decoded into strings using the utf8 encoding before being returned. If `encoding=None` is set, then the raw bytes will be returned. All of the encodings available at https://docs.python.org/3/library/codecs.html#standard-encodings are valid.
1111

12-
Only one process can open a capture file for writing at a time. Multiple processes can open the same capture file for read simultaneously with each other and with one process that opens it for write.
12+
To ensure only one process can open a capture file for writing at a time set `use_os_file_locking` to True. Multiple processes can always open the same capture file for read simultaneously with each other and with one process that opens it for write.
13+
14+
Single process but multi-threaded applications do not need `use_os_file_locking` to be True because the CaptureFile module will manage contention using in-memory locks. File locking in some Linux operating/file systems does not work well across servers and even sometimes on a single server so be sure to verify any specific scenario that depends on file locking.
15+
16+
By default the CaptureFile is tuned for a commit size of approximately 32KB by having the default value of `compression_block_size` set to 32768. Any amount less than this is re-written every commit until the amount of data equals or exceeds this number at which point the data is compressed, written out and (mostly) never re-written again. If commits will typically contain substantially more than 32KB of data, setting `compression_block_size` to a larger number can improve the amount of compression obtained, resulting in a smaller CaptureFile.
1317

1418
An `InvalidCaptureFile` exception is raised if this constructor is used to open a file that is not a valid capture file, is in an unsupported version of the capture file format, or is a corruptted.
1519

@@ -20,10 +24,12 @@ An `InvalidCaptureFile` exception is raised if this constructor is used to open
2024
```python
2125
__init__(
2226
file_name: str,
23-
to_write: InitVar[bool] = False,
27+
to_write: bool = False,
2428
initial_metadata: InitVar[Optional[bytes]] = None,
2529
force_new_empty_file: InitVar[bool] = False,
26-
encoding: Optional[str] = 'utf_8'
30+
encoding: Optional[str] = 'utf_8',
31+
use_os_file_locking: bool = False,
32+
compression_block_size: InitVar[int] = 32768
2733
) → None
2834
```
2935

@@ -36,7 +42,7 @@ __init__(
3642

3743
---
3844

39-
<a href="..\CaptureFile\CaptureFile.py#L783"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
45+
<a href="..\CaptureFile\CaptureFile.py#L851"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
4046

4147
## <kbd>method</kbd> `add_record`
4248

@@ -54,7 +60,7 @@ If the capture file is open for read but not for write, then it will raise a `Ca
5460

5561
---
5662

57-
<a href="..\CaptureFile\CaptureFile.py#L167"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
63+
<a href="..\CaptureFile\CaptureFile.py#L211"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
5864

5965
## <kbd>method</kbd> `close`
6066

@@ -70,7 +76,7 @@ If this capture file is already closed, then this call does nothing.
7076

7177
---
7278

73-
<a href="..\CaptureFile\CaptureFile.py#L827"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
79+
<a href="..\CaptureFile\CaptureFile.py#L896"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
7480

7581
## <kbd>method</kbd> `commit`
7682

@@ -90,7 +96,7 @@ If it is not open for write then this method will raise a `CaptureFileNotOpenFor
9096

9197
---
9298

93-
<a href="..\CaptureFile\CaptureFile.py#L433"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
99+
<a href="..\CaptureFile\CaptureFile.py#L497"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
94100

95101
## <kbd>method</kbd> `get_metadata`
96102

@@ -106,7 +112,7 @@ If this capture file is not open, then this method will raise a `CaptureFileNotO
106112

107113
---
108114

109-
<a href="..\CaptureFile\CaptureFile.py#L112"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
115+
<a href="..\CaptureFile\CaptureFile.py#L147"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
110116

111117
## <kbd>method</kbd> `open`
112118

@@ -128,7 +134,7 @@ If any of these conditions are violated, then then this method will raise a `Cap
128134

129135
---
130136

131-
<a href="..\CaptureFile\CaptureFile.py#L618"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
137+
<a href="..\CaptureFile\CaptureFile.py#L684"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
132138

133139
## <kbd>method</kbd> `record_at`
134140

@@ -148,7 +154,7 @@ If this capture file is not open, then this method will raise a `CaptureFileNotO
148154

149155
---
150156

151-
<a href="..\CaptureFile\CaptureFile.py#L673"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
157+
<a href="..\CaptureFile\CaptureFile.py#L741"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
152158

153159
## <kbd>method</kbd> `record_count`
154160

@@ -160,7 +166,7 @@ Returns the number of records available when the file was opened or last refresh
160166

161167
---
162168

163-
<a href="..\CaptureFile\CaptureFile.py#L516"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
169+
<a href="..\CaptureFile\CaptureFile.py#L581"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
164170

165171
## <kbd>method</kbd> `record_generator`
166172

@@ -180,7 +186,7 @@ If this capture file is not open, then this method will raise a `CaptureFileNotO
180186

181187
---
182188

183-
<a href="..\CaptureFile\CaptureFile.py#L307"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
189+
<a href="..\CaptureFile\CaptureFile.py#L363"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
184190

185191
## <kbd>method</kbd> `refresh`
186192

@@ -196,7 +202,7 @@ If this capture file is not open, then this method will raise a `CaptureFileNotO
196202

197203
---
198204

199-
<a href="..\CaptureFile\CaptureFile.py#L456"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
205+
<a href="..\CaptureFile\CaptureFile.py#L520"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
200206

201207
## <kbd>method</kbd> `set_metadata`
202208

0 commit comments

Comments
 (0)