Skip to content

Commit 26b2b31

Browse files
committed
feature: implement WSI optimizations and caching for improved performance
1 parent c9b8c22 commit 26b2b31

File tree

6 files changed

+446
-42
lines changed

6 files changed

+446
-42
lines changed

label_studio/core/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license.
22
"""
3+
4+
# Import the WSI optimization initializer - this will initialize WSI optimizations when Django starts
5+
default_app_config = 'core.wsi_optimization_init.WSIOptimizationConfig'

label_studio/core/deepzoom_util.py

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,87 @@
11
import os
2+
import io
23
from pathlib import Path
4+
from typing import Any, Callable, Optional, Tuple, Union
35

46
import openslide
57
import opensdpc
8+
from PIL import Image
69

710
from .annotated_deepzoom_generator import AnnotatedDeepZoomGenerator
11+
from .wsi_common import Transform, get_transform_for_color_profile
812

913
class DeepZoomWrapper:
14+
"""Enhanced DeepZoomWrapper with performance optimizations for WSI image viewing"""
15+
1016
def __init__(self, full_path, tile_size:int = 254, overlap:int = 1, limit_bounds:bool = False):
11-
full_path = str(full_path)
12-
_, ext = os.path.splitext(full_path)
13-
14-
if ext == ".sdpc":
15-
self._osr = opensdpc.OpenSdpc(full_path)
17+
full_path_str = str(full_path)
18+
self.path = full_path_str
19+
self.filename = os.path.basename(full_path_str)
20+
_, ext = os.path.splitext(full_path_str)
21+
22+
# Track if the slide was opened with OpenSdpc
23+
self.is_sdpc = ext.lower() == ".sdpc"
24+
25+
# Load the appropriate slide object
26+
if self.is_sdpc:
27+
self._osr = opensdpc.OpenSdpc(full_path_str)
1628
else:
17-
self._osr = openslide.OpenSlide(full_path)
18-
29+
self._osr = openslide.OpenSlide(full_path_str)
30+
31+
# Setup the deep zoom generator
1932
self._dzg = AnnotatedDeepZoomGenerator(
2033
self._osr,
21-
full_path=Path(full_path),
34+
full_path=Path(full_path_str),
2235
tile_size=tile_size,
2336
overlap=overlap,
2437
limit_bounds=limit_bounds,
2538
)
39+
40+
# Set MPP (microns per pixel) if available
41+
self.mpp = 0
42+
try:
43+
if not self.is_sdpc:
44+
mpp_x = float(self._osr.properties.get(openslide.PROPERTY_NAME_MPP_X, 0))
45+
mpp_y = float(self._osr.properties.get(openslide.PROPERTY_NAME_MPP_Y, 0))
46+
if mpp_x > 0 and mpp_y > 0:
47+
self.mpp = (mpp_x + mpp_y) / 2
48+
except (KeyError, ValueError):
49+
pass
50+
51+
# Default transform (no-op)
52+
self.transform: Transform = lambda img: None
2653

27-
def get_tile(self, level: int, tile: tuple[int, int]):
28-
return self._dzg.get_tile(level, tile)
54+
def get_tile(self, level: int, tile: tuple[int, int]) -> Image.Image:
55+
"""Get a tile with optimized color profile handling"""
56+
# Get the tile
57+
tile_img = self._dzg.get_tile(level, tile)
58+
59+
# Apply color transformation if needed
60+
if hasattr(self, 'transform'):
61+
self.transform(tile_img)
62+
63+
return tile_img
2964

30-
def get_dzi(self, format:str = "jpeg"):
65+
def get_dzi(self, format:str = "jpeg") -> str:
66+
"""Get the DZI XML for this slide"""
3167
return self._dzg.get_dzi(format)
3268

69+
def get_tile_bytes(self, level: int, tile: tuple[int, int], format:str = "jpeg", quality:int = 75) -> bytes:
70+
"""Get a tile as bytes, optimized for HTTP response"""
71+
# Get the tile with any color transformations applied
72+
tile_img = self.get_tile(level, tile)
73+
74+
# Convert to bytes with specified format and quality
75+
buf = io.BytesIO()
76+
tile_img.save(
77+
buf,
78+
format,
79+
quality=quality,
80+
icc_profile=tile_img.info.get('icc_profile'),
81+
)
82+
buf.seek(0)
83+
return buf.getvalue()
84+
3385
@property
3486
def level_dimensions(self):
3587
return self._dzg.level_dimensions

label_studio/core/views.py

Lines changed: 144 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pathlib import Path
1010
from wsgiref.util import FileWrapper
1111
from io import BytesIO
12+
import time
1213

1314
import pandas as pd
1415
import requests
@@ -39,8 +40,18 @@
3940

4041
from .deepzoom_util import DeepZoomWrapper
4142

43+
# Create dedicated loggers with direct console output
4244
logger = logging.getLogger(__name__)
4345

46+
# Create a WSI logger specifically for WSI operations
47+
wsi_logger = logging.getLogger('server')
48+
if not wsi_logger.handlers:
49+
wsi_handler = logging.StreamHandler()
50+
wsi_handler.setFormatter(logging.Formatter('[%(asctime)s] [WSI] %(levelname)s: %(message)s'))
51+
wsi_logger.addHandler(wsi_handler)
52+
wsi_logger.setLevel(logging.INFO) # Make sure it's visible
53+
wsi_logger.propagate = False # Don't propagate to root logger
54+
4455

4556
_PARAGRAPH_SAMPLE = None
4657

@@ -200,81 +211,183 @@ def heidi_tips(request):
200211
@api_view(['GET'])
201212
@permission_classes([IsAuthenticated])
202213
def localfiles_data(request):
203-
"""Serving files for LocalFilesImportStorage"""
214+
"""Serving files for LocalFilesImportStorage with optimized WSI handling"""
215+
# Start timing the entire request
216+
api_start = time.time()
217+
218+
# Log request details
219+
request_path = request.GET.get('d', '')
220+
wsi_logger.debug(f'Request: {request_path}')
221+
204222
user = request.user
205223
path = request.GET.get('d')
206-
207224
level = request.GET.get('level')
208225
col = request.GET.get('col')
209226
row = request.GET.get('row')
210227

228+
wsi_logger.info(f'Request started: path={path}, level={level}, col={col}, row={row}')
229+
211230
if settings.LOCAL_FILES_SERVING_ENABLED is False:
231+
wsi_logger.debug(f'Request rejected: LOCAL_FILES_SERVING_ENABLED is False')
212232
return HttpResponseForbidden(
213233
"Serving local files can be dangerous, so it's disabled by default. "
214234
'You can enable it with LOCAL_FILES_SERVING_ENABLED environment variable, '
215235
'please check docs: https://labelstud.io/guide/storage.html#Local-storage'
216236
)
217237

238+
# Start timing path resolution and permissions
239+
path_start = time.time()
240+
218241
local_serving_document_root = settings.LOCAL_FILES_DOCUMENT_ROOT
219242
if path and request.user.is_authenticated:
220243
path = posixpath.normpath(path).lstrip('/')
221244
full_path = Path(safe_join(local_serving_document_root, path))
245+
246+
wsi_logger.debug(f'Path resolution: {time.time() - path_start:.4f}s, full_path={full_path}')
222247

223248
# Check if the file is a WSI file
224249
ext = os.path.splitext(full_path)[1].lower()
225250
is_wsi = ext in ['.svs', '.sdpc', '.tif', '.tiff', '.csp', '.kfb']
251+
252+
# Log file type
253+
wsi_logger.debug(f'File type check: extension={ext}, is_wsi={is_wsi}')
226254

255+
# Start timing database operations
256+
db_start = time.time()
257+
227258
# Try to find Local File Storage connection based prefix:
228259
# storage.path=/home/user, full_path=/home/user/a/b/c/1.jpg =>
229260
# full_path.startswith(path) => True
230261
localfiles_storage = LocalFilesImportStorage.objects.annotate(
231262
_full_path=Value(os.path.dirname(full_path), output_field=CharField())
232263
).filter(_full_path__startswith=F('path'))
233-
264+
265+
wsi_logger.debug(f'Database query: {time.time() - db_start:.4f}s')
266+
267+
# Start timing permission check
268+
perm_start = time.time()
269+
234270
# Check if user has permissions to access this storage
235271
user_has_permissions = False
236272
if localfiles_storage.exists():
237273
user_has_permissions = any(storage.project.has_permission(user) for storage in localfiles_storage)
238-
239-
if not user_has_permissions or not os.path.exists(full_path):
240-
return HttpResponseNotFound()
274+
275+
wsi_logger.debug(f'Permission check: {time.time() - perm_start:.4f}s, has_permission={user_has_permissions}')
241276

242-
# Check if the file is a WSI file and has level, col, and row parameters
243-
if is_wsi and level and col and row:
244-
try:
245-
level = int(level)
246-
col = int(col)
247-
row = int(row)
248-
except ValueError:
249-
return HttpResponseForbidden('Invalid level, col, or row parameter')
250-
251-
# Get the tile image
252-
dz = DeepZoomWrapper(full_path)
253-
tile_image = dz.get_tile(level, (col, row))
254-
255-
# Create a response with the tile image
256-
buf = BytesIO()
257-
tile_image.save(buf, 'jpeg')
258-
buf.seek(0)
259-
return HttpResponse(buf, content_type='image/jpeg')
277+
# Log file existence
278+
if not os.path.exists(full_path):
279+
wsi_logger.debug(f'File not found: {full_path}')
280+
return HttpResponseNotFound()
281+
elif not user_has_permissions:
282+
wsi_logger.debug(f'Permission denied for user: {user.email}')
283+
return HttpResponseNotFound()
260284

261-
# Check if the file is a WSI file without level, col, and row parameters
285+
# Optimized WSI file handling
262286
if is_wsi:
263-
# Create a DeepZoomWrapper instance
264-
dz = DeepZoomWrapper(full_path)
265-
266-
# Get the DZI (Deep Zoom Image) for the WSI file
267-
dzi_xml = dz.get_dzi(format='jpeg')
268-
return HttpResponse(dzi_xml, content_type='application/xml')
287+
# Start timing WSI processing
288+
wsi_start = time.time()
289+
290+
# Import SLIDE_CACHE here to avoid circular imports
291+
from .wsi_optimizations import SLIDE_CACHE
292+
wsi_logger.debug(f'Import modules: {time.time() - wsi_start:.4f}s')
293+
294+
# Get the slide from cache or load it
295+
cache_start = time.time()
296+
try:
297+
# Check if we have a cache hit before accessing
298+
cache_hit = full_path in SLIDE_CACHE._cache
299+
300+
# Get the slide (either from cache or load it)
301+
dz = SLIDE_CACHE.get(full_path)
302+
cache_time = time.time() - cache_start
303+
304+
# Log cache access details
305+
cache_size = len(SLIDE_CACHE._cache)
306+
cache_max = SLIDE_CACHE.cache_size
307+
wsi_logger.info(f'Slide cache access: time={cache_time:.4f}s, hit={cache_hit}, size={cache_size}/{cache_max}')
308+
except Exception as e:
309+
wsi_logger.error(f"Error loading file {full_path}: {e}")
310+
return HttpResponseNotFound()
311+
312+
# Serve a specific tile if level, col, and row are provided
313+
if level and col and row:
314+
try:
315+
level = int(level)
316+
col = int(col)
317+
row = int(row)
318+
wsi_logger.debug(f'Parsing tile parameters: level={level}, col={col}, row={row}')
319+
except ValueError:
320+
wsi_logger.error(f'Invalid tile parameters: level={level}, col={col}, row={row}')
321+
return HttpResponseForbidden('Invalid level, col, or row parameter')
322+
323+
try:
324+
# Start timing tile extraction
325+
tile_start = time.time()
326+
327+
# Get the tile bytes directly (optimized for transfer)
328+
# Use quality 75 for JPEG, matches server.py settings
329+
tile_bytes = dz.get_tile_bytes(level, (col, row), format='jpeg', quality=75)
330+
331+
# Log tile extraction time
332+
tile_time = time.time() - tile_start
333+
334+
# Return the tile with proper content type
335+
total_time = time.time() - api_start
336+
cache_hit = 'hit' if full_path in SLIDE_CACHE._cache else 'miss'
337+
wsi_logger.info(f'Tile served: path={full_path}, level={level}, col={col}, row={row}, cache={cache_hit}, tile_time={tile_time:.4f}s, total_time={total_time:.4f}s')
338+
339+
# Print a performance summary to make it easy to spot slow operations
340+
wsi_logger.info(f'WSI PERFORMANCE SUMMARY - Tile: cache={cache_hit}, tile_extraction={tile_time:.4f}s, total={total_time:.4f}s')
341+
342+
return HttpResponse(tile_bytes, content_type='image/jpeg')
343+
except Exception as e:
344+
wsi_logger.error(f"Error getting tile for {full_path} at level {level}, col={col}, row={row}: {e}")
345+
return HttpResponseNotFound()
346+
else:
347+
# Return DZI XML for the WSI file
348+
try:
349+
# Start timing DZI generation
350+
dzi_start = time.time()
351+
352+
dzi_xml = dz.get_dzi(format='jpeg')
353+
354+
# Log DZI generation time
355+
dzi_time = time.time() - dzi_start
356+
total_time = time.time() - api_start
357+
cache_hit = 'hit' if full_path in SLIDE_CACHE._cache else 'miss'
358+
359+
wsi_logger.info(f'DZI served: path={full_path}, cache={cache_hit}, dzi_time={dzi_time:.4f}s, total_time={total_time:.4f}s')
360+
361+
# Print a performance summary
362+
wsi_logger.info(f'WSI PERFORMANCE SUMMARY - DZI: cache={cache_hit}, generation={dzi_time:.4f}s, total={total_time:.4f}s')
363+
364+
return HttpResponse(dzi_xml, content_type='application/xml')
365+
except Exception as e:
366+
wsi_logger.error(f"Error getting DZI for {full_path}: {e}")
367+
return HttpResponseNotFound()
269368

270369
# If the file is not a WSI file, serve it as a regular file
271370
if user_has_permissions and os.path.exists(full_path):
371+
# Start timing regular file serving
372+
file_start = time.time()
373+
272374
content_type, encoding = mimetypes.guess_type(str(full_path))
273375
content_type = content_type or 'application/octet-stream'
376+
377+
# Log file serving time
378+
file_time = time.time() - file_start
379+
total_time = time.time() - api_start
380+
wsi_logger.info(f'Regular file served: path={full_path}, content_type={content_type}, file_time={file_time:.4f}s, total_time={total_time:.4f}s')
381+
382+
# Print a performance summary for comparison
383+
wsi_logger.info(f'WSI PERFORMANCE SUMMARY - Regular file: size={os.path.getsize(full_path)}, file_time={file_time:.4f}s, total={total_time:.4f}s')
384+
274385
return RangedFileResponse(request, open(full_path, mode='rb'), content_type)
275386
else:
276387
return HttpResponseNotFound()
277388

389+
# Log total time for rejected requests
390+
wsi_logger.info(f'Request rejected: total_time={time.time() - api_start:.4f}s')
278391
return HttpResponseForbidden()
279392

280393

0 commit comments

Comments
 (0)