@@ -334,7 +334,83 @@ import pydicom
334334dcm = pydicom.dcmread(" ..." ) # Any method to read from file/cloud storage
335335
336336
337- print (" Has Extended Offset Table:" , " ExtendedOffsetTable" in dcm)
338- print (" Has Basic Offset Table:" , dcm.Pixeldata[4 :8 ] != b ' \x00\x00\x00\x00 ' )
337+ if not dcm.file_meta.TransferSyntaxUID.is_encapsulated:
338+ print (
339+ " This image does not use an encapsulated (compressed) transfer "
340+ " syntax, so offset tables are not required."
341+ )
342+ else :
343+ # Check metadata for the extended offset table
344+ print (" Has Extended Offset Table:" , " ExtendedOffsetTable" in dcm)
345+
346+ # The start of the PixelData element will be a 4 byte item tag for the offset table,
347+ # which should always be present. The following 4 bytes gives the length of the offset
348+ # table. If it is non-zero, the offset table is present
349+ has_basic_offset_table = dcm.PixelData[4 :8 ] != b ' \x00\x00\x00\x00 '
350+ print (" Has Basic Offset Table:" , has_basic_offset_table)
351+
352+ ```
353+
354+ To do this from a remote Google Cloud Storage blob without needing to pull all the pixel data, you can do something like this:
355+
356+ ``` python
357+ import os
358+ from pydicom import dcmread
359+ from google.cloud import storage
360+
361+
362+ def check_offset_table (blob_key : str ):
363+ """ Print information on the offset table in an IDC blob."""
364+ # Create a storage client and use it to access the IDC's public data package
365+ gcs_client = storage.Client.create_anonymous_client()
366+
367+ # Blob object for the particular file you want to check
368+ blob = gcs_client.bucket(" idc-open-data" ).blob(blob_key)
369+
370+ # Open the blob object for remote reading with a ~500kB chunk size
371+ with blob.open(mode = " rb" , chunk_size = 500_000 ) as reader:
372+ # Read the file with stop_before_pixels=True, this moves the cursor
373+ # position to the start of the pixel data attribute
374+ dcm = dcmread(reader, stop_before_pixels = True )
375+
376+ if not dcm.file_meta.TransferSyntaxUID.is_encapsulated:
377+ print (
378+ " This image does not use an encapsulated (compressed) transfer "
379+ " syntax, so offset tables are not required."
380+ )
381+ else :
382+ # The presence of the extended offset table in the loaded metadata can be
383+ # checked straightforwardly
384+ has_extended_offset_table = " ExtendedOffsetTable" in dcm
385+ print (" Has Extended Offset Table:" , has_extended_offset_table)
386+
387+ # Read the next tag, should be the pixel data tag
388+ tag = reader.read(4 )
389+ assert tag == b ' \xe0\x7f\x10\x00 ' , " Expected pixel data tag"
390+
391+ # Skip over VR (2 bytes), reserved (2 bytes), and pixel data length (4
392+ # bytes), giving 8 bytes total. Refer to
393+ # https://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_A.4.html#table_A.4-2
394+ reader.seek(8 , os.SEEK_CUR )
395+
396+ # Read the item tag for the offset table item
397+ item_tag = reader.read(4 )
398+ assert item_tag == b ' \xfe\xff\x00\xe0 ' , " Expected item tag"
399+
400+ # Read the 32bit length of the pixel data's basic offset table
401+ length = reader.read(4 )
402+
403+ # If the length of the offset table is non-zero, the offset table exists
404+ has_basic_offset_table = (length != b ' \x00\x00\x00\x00 ' )
405+ print (" Has Basic Offset Table:" , has_basic_offset_table)
406+
407+
408+ # Example with no offset table (NLST-LSS collection)
409+ check_offset_table(" 4a30ffd2-8489-427b-9a83-03f4cf28534d/ad46e1e3-b37c-434b-a67a-5bacbcc608d9.dcm" )
410+
411+ # Example with basic offset table (CCDI-MCI collection)
412+ check_offset_table(" 763fe058-7d25-4ba7-9b29-fd3d6c41dc4b/210f0529-c767-4795-9acf-bad2f4877427.dcm" )
339413
414+ # Example with extended offset table (CMB-MML collection)
415+ check_offset_table(" 79f38b50-4df4-4358-9271-f28aeac573d7/23b9272a-34ef-49ca-833f-84329a18c1e4.dcm" )
340416```
0 commit comments