Skip to content

Commit ee75de3

Browse files
authored
Merge pull request #22 from ramonaoptics/multi_threaded2
Optmize for multi-threaded by releasing the GIL
2 parents 62c6a49 + 65dc3b1 commit ee75de3

File tree

5 files changed

+289
-79
lines changed

5 files changed

+289
-79
lines changed

.github/workflows/tests.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ jobs:
2424
- os: "macos-latest"
2525
installer-url: "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-x86_64.sh"
2626
python-version: "3.13"
27+
- os: "macos-14"
28+
installer-url: "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh"
29+
python-version: "3.12"
30+
- os: "macos-14"
31+
installer-url: "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh"
32+
python-version: "3.13"
33+
- os: "macos-14"
34+
installer-url: "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh"
35+
python-version: "3.14"
2736
- os: "windows-latest"
2837
installer-url: "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Windows-x86_64.exe"
2938
python-version: "3.12"

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.5.0] - 2025-12-29
9+
10+
- Optimize image reading and writing for multi-threaded workloads by releasing the
11+
GIL for entire operations instead of per-line. All tight loops (component and line
12+
iterations) are now executed in C++ with the GIL released, significantly improving
13+
performance in multi-threaded scenarios. Single-threaded performance is also improved
14+
due to reduced Python overhead and better cache locality.
15+
- Remove temporary buffer allocations during image reading by writing directly to
16+
the output array with clipping and dtype conversion handled in C++.
17+
818
## [0.4.6] - 2025-12-29
919

1020
- Fix reading from memory files when the offset parameter is provided.

ojph/_imread.py

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import numpy as np
2-
import ctypes
32
from warnings import warn
43

54
from .ojph_bindings import J2CInfile, MemInfile, Codestream
@@ -302,44 +301,11 @@ def read_image(
302301
min_val = iinfo.min
303302
max_val = iinfo.max
304303

305-
if self._num_components == 1:
306-
# Single component - always HW format
307-
for h in range(height):
308-
self._codestream_pull(0, out=image[h], min_val=min_val, max_val=max_val)
309-
elif self._channel_order == 'CHW':
310-
# Non-RGB multi-component - use planar flag for format detection
311-
for c in range(self._num_components):
312-
for h in range(height):
313-
self._codestream_pull(c, out=image[c, h, :], min_val=min_val, max_val=max_val)
314-
else:
315-
# Non-planar mode was used for writing - return HWC format
316-
for c in range(self._num_components):
317-
for h in range(height):
318-
self._codestream_pull(c, out=image[h, :, c], min_val=min_val, max_val=max_val)
304+
self._codestream.pull_all_components(image, self._num_components, self._channel_order, min_val, max_val)
319305

320306
self._close_codestream_and_file()
321307
return image
322308

323-
def _codestream_pull(self, component, out, min_val=None, max_val=None):
324-
line = self._codestream.pull(component)
325-
i32_ptr = ctypes.cast(line.i32_address, ctypes.POINTER(ctypes.c_int32))
326-
line_array = np.ctypeslib.as_array(
327-
ctypes.cast(i32_ptr, ctypes.POINTER(ctypes.c_int32)),
328-
shape=(line.size,)
329-
)
330-
331-
# Aassume min_val is not None if max_val is not None
332-
if max_val is not None:
333-
line_array = np.clip(
334-
line_array,
335-
min_val,
336-
max_val,
337-
out=out,
338-
casting='unsafe',
339-
)
340-
else:
341-
out[:] = line_array
342-
343309
def _close_codestream_and_file(self):
344310
if self._codestream is not None:
345311
self._codestream.close()

ojph/_imwrite.py

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import numpy as np
2-
import ctypes
32
import inspect
43
from collections.abc import Buffer
54

@@ -39,8 +38,8 @@ def imwrite_to_memory(
3938
qstep=None,
4039
progression_order=None,
4140
tlm_marker=True,
42-
tileparts_at_resolutions=True,
43-
tileparts_at_components=False,
41+
tileparts_at_resolutions=None,
42+
tileparts_at_components=None,
4443
):
4544
mem_outfile = MemOutfile()
4645
mem_outfile.open(65536, False)
@@ -72,8 +71,8 @@ def imwrite(
7271
qstep=None,
7372
progression_order=None,
7473
tlm_marker=True,
75-
tileparts_at_resolutions=True,
76-
tileparts_at_components=False,
74+
tileparts_at_resolutions=None,
75+
tileparts_at_components=None,
7776
):
7877
# Auto-detect channel order if not provided
7978
if channel_order is None:
@@ -149,46 +148,16 @@ def imwrite(
149148
if not reversible and qstep is not None:
150149
codestream.access_qcd().set_irrev_quant(qstep)
151150
codestream.set_planar(num_components > 1)
152-
# Set tile parts for resolution, but not for channels
153-
codestream.set_tilepart_divisions(True, False)
151+
if tileparts_at_resolutions is None:
152+
tileparts_at_resolutions = progression_order == "RLCP"
153+
if tileparts_at_components is None:
154+
tileparts_at_components = False
155+
codestream.set_tilepart_divisions(tileparts_at_resolutions, tileparts_at_components)
154156
codestream.request_tlm_marker(tlm_marker)
155157

156158
codestream.write_headers(ojph_file, None, 0)
157159

158-
line = codestream.exchange(None, 0)
159-
if channel_order == "HW":
160-
# Single component - simple case
161-
for i in range(height):
162-
i32_ptr = ctypes.cast(line.i32_address, ctypes.POINTER(ctypes.c_uint32))
163-
line_array = np.ctypeslib.as_array(
164-
ctypes.cast(i32_ptr, ctypes.POINTER(ctypes.c_uint32)),
165-
shape=(line.size,)
166-
)
167-
line_array[...] = image[i, :]
168-
line = codestream.exchange(line, 0)
169-
elif channel_order == 'HWC':
170-
# Multi-component - use planar mode for efficiency
171-
# HWC format: image[height, width, channel]
172-
for c in range(num_components):
173-
for i in range(height):
174-
i32_ptr = ctypes.cast(line.i32_address, ctypes.POINTER(ctypes.c_uint32))
175-
line_array = np.ctypeslib.as_array(
176-
ctypes.cast(i32_ptr, ctypes.POINTER(ctypes.c_uint32)),
177-
shape=(line.size,)
178-
)
179-
line_array[...] = image[i, :, c]
180-
line = codestream.exchange(line, c)
181-
elif channel_order == 'CHW':
182-
# CHW format: image[channel, height, width]
183-
for c in range(num_components):
184-
for i in range(height):
185-
i32_ptr = ctypes.cast(line.i32_address, ctypes.POINTER(ctypes.c_uint32))
186-
line_array = np.ctypeslib.as_array(
187-
ctypes.cast(i32_ptr, ctypes.POINTER(ctypes.c_uint32)),
188-
shape=(line.size,)
189-
)
190-
line_array[...] = image[c, i, :]
191-
line = codestream.exchange(line, c)
160+
codestream.push_all_components(image, num_components, channel_order)
192161

193162
codestream.flush()
194163
if close_codestream:

0 commit comments

Comments
 (0)