1515 FEATURE_BASE_NAME_HEAD ,
1616 FEATURE_BASE_NAME_LANGUAGE ,
1717 TYPE_NAME_DOCUMENT_ANNOTATION ,
18+ TYPE_NAME_ANNOTATION ,
1819 TYPE_NAME_FS_ARRAY ,
1920 TYPE_NAME_FS_LIST ,
2021 TYPE_NAME_SOFA ,
@@ -367,6 +368,78 @@ def add_annotations(self, annotations: Iterable[FeatureStructure]):
367368 """
368369 self .add_all (annotations )
369370
371+ def crop_sofa_string (self , sofa_begin : int , sofa_end : int , overlap : bool = True ):
372+ """Replaces current sofa string with a cutout of the given range. Removes all annotations outside of range,
373+ but keeps annotations that overlap with cutout points by default.
374+
375+ Args:
376+ sofa_begin: The beginning of the cutout sofa.
377+ sofa_end: The end of the cutout sofa.
378+ overlap: If true, keeps overlapping annotations and modifies begin and end of annotation accordingly.
379+
380+ Raises:
381+ ValueError: If cutout indices are invalid.
382+ Note:
383+ Removal performed by this method only removes annotations from the current view's
384+ index. Feature structures that are removed from the view remain in memory and any
385+ references from kept annotations to those feature structures are left intact. Such
386+ transitively referenced feature structures will still be discovered by traversal
387+ (e.g. ``_find_all_fs()``) and included during serialization.
388+
389+ Important: only the annotations that are kept (inside the cut or overlapping
390+ the cut boundaries) have their ``begin``/``end`` offsets adjusted to the new
391+ sofa coordinate space. Feature structures that are removed from the view are
392+ not re-anchored or relocated — they keep their original ``begin``/``end``
393+ values. As a result, serializers may attempt to transcode offsets that fall
394+ outside the new sofa range; the offset converter will emit ``UserWarning``
395+ messages for unmappable offsets but will not raise an exception. If you
396+ require a cascading delete or re-anchoring of transitively referenced feature
397+ structures, perform an explicit graph traversal and removal or implement an
398+ opt-in ``cascade=True`` behavior.
399+ """
400+ if self .sofa_string is None :
401+ raise ValueError ("Cannot crop sofa string: CAS has no sofa string for the current view" )
402+
403+ if 0 <= sofa_begin < sofa_end <= len (self .sofa_string ):
404+ self .sofa_string = self .sofa_string [sofa_begin :sofa_end ]
405+ # Make an explicit snapshot of the current annotations to avoid
406+ # issues when removing/modifying elements during iteration.
407+ for annotation in list (self .select_all ()):
408+ # Determine whether the annotation will be kept and how its
409+ # offsets need to be adjusted. If offsets are adjusted we must
410+ # reindex the annotation (remove then add) so that the
411+ # underlying SortedKeyList remains correctly ordered by the
412+ # updated begin/end values.
413+ if sofa_begin <= annotation .begin and annotation .end <= sofa_end :
414+ # fully contained
415+ self ._current_view .remove_annotation_from_index (annotation )
416+ annotation .begin = annotation .begin - sofa_begin
417+ annotation .end = annotation .end - sofa_begin
418+ self ._current_view .add_annotation_to_index (annotation )
419+ elif overlap and sofa_begin < annotation .end <= sofa_end :
420+ # left overlap (annotation starts before cut)
421+ self ._current_view .remove_annotation_from_index (annotation )
422+ annotation .begin = 0
423+ annotation .end = annotation .end - sofa_begin
424+ self ._current_view .add_annotation_to_index (annotation )
425+ elif overlap and sofa_begin <= annotation .begin < sofa_end :
426+ # right overlap (annotation ends after cut)
427+ self ._current_view .remove_annotation_from_index (annotation )
428+ annotation .begin = annotation .begin - sofa_begin
429+ annotation .end = len (self .sofa_string )
430+ self ._current_view .add_annotation_to_index (annotation )
431+ elif overlap and annotation .begin <= sofa_begin and sofa_end <= annotation .end :
432+ # annotation fully covers the cut
433+ self ._current_view .remove_annotation_from_index (annotation )
434+ annotation .begin = 0
435+ annotation .end = len (self .sofa_string )
436+ self ._current_view .add_annotation_to_index (annotation )
437+ else :
438+ # annotation falls completely outside the cut; remove it
439+ self .remove (annotation )
440+ else :
441+ raise ValueError (f"Invalid indices for begin { sofa_begin } and end { sofa_end } " )
442+
370443 def remove (self , annotation : FeatureStructure ):
371444 """Removes an annotation from an index. This throws if the
372445 annotation was not present.
@@ -386,6 +459,38 @@ def remove_annotation(self, annotation: FeatureStructure):
386459 """
387460 self .remove (annotation )
388461
462+ def remove_annotations_in_range (self , begin : int , end : int , type_ : Optional [Union [Type , str ]] = None ):
463+ """Removes annotations between two indices of the sofa string.
464+
465+ Args:
466+ begin: The beginning of the cutting interval.
467+ end: The end of the cutting interval.
468+ type_: The type or name of the type name whose annotation instances are to be found
469+ Raises:
470+ ValueError: If range indices are invalid.
471+ """
472+
473+ # If no type is provided, operate on annotation-like feature
474+ # structures only (those that have `begin` and `end`) to avoid
475+ # AttributeError for arbitrary FS (e.g., instances of uima.cas.TOP).
476+ if type_ is None :
477+ # Only operate on annotation-like feature structures to avoid
478+ # AttributeError for non-annotation FS present in the view.
479+ annotations = [a for a in self .select_all () if self .typesystem .is_instance_of (a .type , TYPE_NAME_ANNOTATION )]
480+ else :
481+ annotations = self .select (type_ )
482+ if self .sofa_string is None :
483+ raise ValueError ("Cannot remove annotations by range: CAS has no sofa string for the current view" )
484+
485+ if 0 <= begin < end <= len (self .sofa_string ):
486+ # Make an explicit snapshot of the annotations to avoid issues when
487+ # removing elements during iteration (defensive copy).
488+ for annotation in list (annotations ):
489+ if begin <= annotation .begin < annotation .end <= end :
490+ self .remove (annotation )
491+ else :
492+ raise ValueError (f"Invalid indices for begin { begin } and end { end } " )
493+
389494 @deprecation .deprecated (details = "Use annotation.get_covered_text()" )
390495 def get_covered_text (self , annotation : FeatureStructure ) -> str :
391496 """Gets the text that is covered by `annotation`.
0 commit comments