Skip to content

Commit 0609cf2

Browse files
authored
bug: Fix field removal and blanking to clean up child UID references (#293)
* bug: Fix field removal and blanking to clean up child UID references - Ensure child field UIDs are removed from internal lookup when parent sequence is deleted or blanked - Update tests in test_action_interaction.py to show this behavior for blanked and removed sequences - Prevent stale references and errors in subsequent recipe actions Before the fix: - If you removed or blanked a parent sequence tag, the internal field lookup (self.fields) still contained UIDs for child tags. - Subsequent recipe actions targeting child tags could cause errors (KeyError, IndexError) or operate on stale references. After the fix: - When you remove or blank a parent sequence tag, all child UIDs nested under that parent are also removed from self.fields. - Subsequent actions do not find stale child references, preventing errors and ensuring consistency. * feat: add get_child_fields to list child fields UIDs, bump version and changelog
1 parent 9406332 commit 0609cf2

File tree

4 files changed

+103
-3
lines changed

4 files changed

+103
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
1414
Referenced versions in headers are tagged on Github, in parentheses are for pypi.
1515

1616
## [vxx](https://github.com/pydicom/deid/tree/master) (master)
17+
- Fix field removal and blanking to clean up child UID references [#293](https://github.com/pydicom/deid/pull/293) (0.4.9)
1718
- Fix UID lookup for nested sequence fields in DICOM datasets [#292](https://github.com/pydicom/deid/pull/292) (0.4.8)
1819
- Allow saving with a compressed transfer syntax [#290](https://github.com/pydicom/deid/pull/290) (0.4.7)
1920
- Improve performance of header deid with caching and lookup tables [#289](https://github.com/pydicom/deid/pull/289)

deid/dicom/parser.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,27 +176,50 @@ def get_nested_field(self, field, return_parent=False):
176176
return parent, desired
177177
return desired
178178

179+
def get_child_fields(self, field):
180+
"""
181+
Return a list of child field UIDs for a given field.
182+
183+
This method identifies all field UIDs in self.fields that are nested under the provided field,
184+
based on the UID prefix convention (parent UID + '__').
185+
It is used to find and remove all child fields when blanking or deleting a parent field (e.g., a sequence).
186+
"""
187+
field_uid_prefix = field.uid + "__"
188+
return [uid for uid in self.fields if uid.startswith(field_uid_prefix)]
189+
179190
def delete_field(self, field):
180191
"""
181192
Delete a field from the dicom.
182193
183194
We do this by way of parsing all nested levels of a tag into actual tags,
184-
and deleting the child node.
195+
and deleting the child node. If the field being deleted has nested children
196+
(e.g., a sequence), also remove all child field UIDs from the internal lookup.
185197
"""
186198
# Returns the parent, and a DataElement (indexes into parent by tag)
187199
parent, desired = self.get_nested_field(field, return_parent=True)
188200
if parent and desired in parent:
189201
del parent[desired]
202+
# Remove the field itself from the lookup
190203
self.fields.remove(field.uid)
204+
# Also remove any child fields that were nested under this field
205+
for child_uid in self.get_child_fields(field):
206+
self.fields.remove(child_uid)
191207

192208
def blank_field(self, field):
193209
"""
194-
Blank a field
210+
Blank a field.
211+
212+
If the field being blanked has nested children (e.g., a sequence),
213+
also remove all child field UIDs from the internal lookup since they
214+
become inaccessible after blanking the parent.
195215
"""
196216
# Returns the parent, and a DataElement (indexes into parent by tag)
197217
parent, desired = self.get_nested_field(field, return_parent=True)
198218
if parent and desired in parent:
199219
parent[desired].value = None
220+
# Also remove any child fields that were nested under this field
221+
for child_uid in self.get_child_fields(field):
222+
self.fields.remove(child_uid)
200223

201224
def replace_field(self, field, value):
202225
"""

deid/tests/test_action_interaction.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,6 +1865,82 @@ def test_remove_remove_should_remove(self):
18651865
with self.assertRaises(KeyError):
18661866
_ = outputfile[field].value
18671867

1868+
def test_remove_parent_tag_replace_child_with_no_conflict(self):
1869+
"""RECIPE RULE
1870+
REMOVE (0032,1064) Requested Procedure Code Sequence
1871+
REPLACE (0008,0104) "Anonymized"
1872+
"""
1873+
print(
1874+
"Test REMOVE standard sequence tag and REPLACE child tag should not cause conflict"
1875+
)
1876+
dicom_file = get_file(self.dataset)
1877+
1878+
field1 = "(0032,1064)"
1879+
action1 = "REMOVE"
1880+
action2 = "REPLACE"
1881+
value2 = "Anonymized"
1882+
field2 = "(0008,0104)"
1883+
field_dicom1 = "00321064"
1884+
field_dicom2 = "00080104"
1885+
1886+
actions = [
1887+
{"action": action1, "field": field1},
1888+
{"action": action2, "field": field2, "value": value2},
1889+
]
1890+
recipe = create_recipe(actions)
1891+
1892+
result = replace_identifiers(
1893+
dicom_files=dicom_file,
1894+
deid=recipe,
1895+
save=True,
1896+
remove_private=False,
1897+
strip_sequences=False,
1898+
)
1899+
outputfile = utils.dcmread(result[0])
1900+
1901+
self.assertEqual(1, len(result))
1902+
with self.assertRaises(KeyError):
1903+
_ = outputfile[field_dicom1].value
1904+
_ = outputfile[field_dicom1][0][field_dicom2].value
1905+
1906+
def test_blank_parent_tag_remove_child_with_no_conflict(self):
1907+
"""RECIPE RULE
1908+
BLANK (0032,1064) Requested Procedure Code Sequence
1909+
REPLACE (0008,0104) "Anonymized"
1910+
"""
1911+
print(
1912+
"Test BLANK standard sequence tag and REPLACE child tag should not cause conflict"
1913+
)
1914+
dicom_file = get_file(self.dataset)
1915+
1916+
field1 = "(0032,1064)"
1917+
action1 = "BLANK"
1918+
action2 = "REPLACE"
1919+
value2 = "Anonymized"
1920+
field2 = "(0008,0104)"
1921+
field_dicom1 = "00321064"
1922+
field_dicom2 = "00080104"
1923+
1924+
actions = [
1925+
{"action": action1, "field": field1},
1926+
{"action": action2, "field": field2, "value": value2},
1927+
]
1928+
recipe = create_recipe(actions)
1929+
1930+
result = replace_identifiers(
1931+
dicom_files=dicom_file,
1932+
deid=recipe,
1933+
save=True,
1934+
remove_private=False,
1935+
strip_sequences=False,
1936+
)
1937+
outputfile = utils.dcmread(result[0])
1938+
1939+
self.assertEqual(1, len(result))
1940+
self.assertEqual(len(outputfile[field_dicom1].value), 0)
1941+
with self.assertRaises(IndexError):
1942+
_ = outputfile[field_dicom1][0][field_dicom2].value
1943+
18681944

18691945
if __name__ == "__main__":
18701946
unittest.main()

deid/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
__copyright__ = "Copyright 2016-2025, Vanessa Sochat"
33
__license__ = "MIT"
44

5-
__version__ = "0.4.8"
5+
__version__ = "0.4.9"
66
AUTHOR = "Vanessa Sochat"
77
AUTHOR_EMAIL = "[email protected]"
88
NAME = "deid"

0 commit comments

Comments
 (0)