diff --git a/CHANGELOG.md b/CHANGELOG.md index 1167134..fd86bfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are: Referenced versions in headers are tagged on Github, in parentheses are for pypi. ## [vxx](https://github.com/pydicom/deid/tree/master) (master) +- Fix field removal and blanking to clean up child UID references [#293](https://github.com/pydicom/deid/pull/293) (0.4.9) - Fix UID lookup for nested sequence fields in DICOM datasets [#292](https://github.com/pydicom/deid/pull/292) (0.4.8) - Allow saving with a compressed transfer syntax [#290](https://github.com/pydicom/deid/pull/290) (0.4.7) - Improve performance of header deid with caching and lookup tables [#289](https://github.com/pydicom/deid/pull/289) diff --git a/deid/dicom/parser.py b/deid/dicom/parser.py index 98c097d..438f919 100644 --- a/deid/dicom/parser.py +++ b/deid/dicom/parser.py @@ -176,27 +176,50 @@ def get_nested_field(self, field, return_parent=False): return parent, desired return desired + def get_child_fields(self, field): + """ + Return a list of child field UIDs for a given field. + + This method identifies all field UIDs in self.fields that are nested under the provided field, + based on the UID prefix convention (parent UID + '__'). + It is used to find and remove all child fields when blanking or deleting a parent field (e.g., a sequence). + """ + field_uid_prefix = field.uid + "__" + return [uid for uid in self.fields if uid.startswith(field_uid_prefix)] + def delete_field(self, field): """ Delete a field from the dicom. We do this by way of parsing all nested levels of a tag into actual tags, - and deleting the child node. + and deleting the child node. If the field being deleted has nested children + (e.g., a sequence), also remove all child field UIDs from the internal lookup. """ # Returns the parent, and a DataElement (indexes into parent by tag) parent, desired = self.get_nested_field(field, return_parent=True) if parent and desired in parent: del parent[desired] + # Remove the field itself from the lookup self.fields.remove(field.uid) + # Also remove any child fields that were nested under this field + for child_uid in self.get_child_fields(field): + self.fields.remove(child_uid) def blank_field(self, field): """ - Blank a field + Blank a field. + + If the field being blanked has nested children (e.g., a sequence), + also remove all child field UIDs from the internal lookup since they + become inaccessible after blanking the parent. """ # Returns the parent, and a DataElement (indexes into parent by tag) parent, desired = self.get_nested_field(field, return_parent=True) if parent and desired in parent: parent[desired].value = None + # Also remove any child fields that were nested under this field + for child_uid in self.get_child_fields(field): + self.fields.remove(child_uid) def replace_field(self, field, value): """ diff --git a/deid/tests/test_action_interaction.py b/deid/tests/test_action_interaction.py index 5caf559..ed9a50c 100644 --- a/deid/tests/test_action_interaction.py +++ b/deid/tests/test_action_interaction.py @@ -1865,6 +1865,82 @@ def test_remove_remove_should_remove(self): with self.assertRaises(KeyError): _ = outputfile[field].value + def test_remove_parent_tag_replace_child_with_no_conflict(self): + """RECIPE RULE + REMOVE (0032,1064) Requested Procedure Code Sequence + REPLACE (0008,0104) "Anonymized" + """ + print( + "Test REMOVE standard sequence tag and REPLACE child tag should not cause conflict" + ) + dicom_file = get_file(self.dataset) + + field1 = "(0032,1064)" + action1 = "REMOVE" + action2 = "REPLACE" + value2 = "Anonymized" + field2 = "(0008,0104)" + field_dicom1 = "00321064" + field_dicom2 = "00080104" + + actions = [ + {"action": action1, "field": field1}, + {"action": action2, "field": field2, "value": value2}, + ] + recipe = create_recipe(actions) + + result = replace_identifiers( + dicom_files=dicom_file, + deid=recipe, + save=True, + remove_private=False, + strip_sequences=False, + ) + outputfile = utils.dcmread(result[0]) + + self.assertEqual(1, len(result)) + with self.assertRaises(KeyError): + _ = outputfile[field_dicom1].value + _ = outputfile[field_dicom1][0][field_dicom2].value + + def test_blank_parent_tag_remove_child_with_no_conflict(self): + """RECIPE RULE + BLANK (0032,1064) Requested Procedure Code Sequence + REPLACE (0008,0104) "Anonymized" + """ + print( + "Test BLANK standard sequence tag and REPLACE child tag should not cause conflict" + ) + dicom_file = get_file(self.dataset) + + field1 = "(0032,1064)" + action1 = "BLANK" + action2 = "REPLACE" + value2 = "Anonymized" + field2 = "(0008,0104)" + field_dicom1 = "00321064" + field_dicom2 = "00080104" + + actions = [ + {"action": action1, "field": field1}, + {"action": action2, "field": field2, "value": value2}, + ] + recipe = create_recipe(actions) + + result = replace_identifiers( + dicom_files=dicom_file, + deid=recipe, + save=True, + remove_private=False, + strip_sequences=False, + ) + outputfile = utils.dcmread(result[0]) + + self.assertEqual(1, len(result)) + self.assertEqual(len(outputfile[field_dicom1].value), 0) + with self.assertRaises(IndexError): + _ = outputfile[field_dicom1][0][field_dicom2].value + if __name__ == "__main__": unittest.main() diff --git a/deid/version.py b/deid/version.py index 685994e..b7d3d32 100644 --- a/deid/version.py +++ b/deid/version.py @@ -2,7 +2,7 @@ __copyright__ = "Copyright 2016-2025, Vanessa Sochat" __license__ = "MIT" -__version__ = "0.4.8" +__version__ = "0.4.9" AUTHOR = "Vanessa Sochat" AUTHOR_EMAIL = "vsoch@users.noreply.github.com" NAME = "deid"