11import csv
22from collections import defaultdict
3- from functools import cmp_to_key
43from io import IOBase , StringIO
5- from typing import Dict , Iterable , Set
4+ from typing import Any , Dict , Iterable , Set
65
76from cassis import Cas
8- from cassis .typesystem import FEATURE_BASE_NAME_SOFA , TYPE_NAME_ANNOTATION , FeatureStructure , Type , is_array
7+ from cassis .typesystem import FEATURE_BASE_NAME_SOFA , TYPE_NAME_ANNOTATION , FeatureStructure , Type , is_annotation , is_array
98
109_EXCLUDED_FEATURES = {FEATURE_BASE_NAME_SOFA }
1110_NULL_VALUE = "<NULL>"
@@ -74,7 +73,7 @@ def _render_feature_structure(
7473) -> []:
7574 row_data = [fs_id_to_anchor .get (fs .xmiID )]
7675
77- if max_covered_text > 0 and _is_annotation_fs (fs ):
76+ if max_covered_text > 0 and is_annotation (fs ):
7877 covered_text = fs .get_covered_text ()
7978 if covered_text and len (covered_text ) >= max_covered_text :
8079 prefix = covered_text [0 : (max_covered_text // 2 )]
@@ -143,7 +142,19 @@ def _generate_anchors(
143142 for t in types_sorted :
144143 type_ = cas .typesystem .get_type (t )
145144 feature_structures = all_feature_structures_by_type [type_ .name ]
146- feature_structures .sort (key = cmp_to_key (lambda a , b : _compare_fs (type_ , a , b )))
145+ # Sort deterministically using a stable key function. We avoid using
146+ # the comparator-based approach to prevent unpredictable comparisons
147+ # between mixed types during lexicographic tuple comparisons.
148+ feature_structures .sort (
149+ key = lambda fs : (
150+ 0 ,
151+ fs .begin ,
152+ fs .end ,
153+ str (_feature_structure_hash (type_ , fs )),
154+ )
155+ if is_annotation (fs )
156+ else (1 , None , None , str (_feature_structure_hash (type_ , fs )))
157+ )
147158
148159 for fs in feature_structures :
149160 add_index_mark = mark_indexed and fs in indexed_feature_structures
@@ -159,7 +170,7 @@ def _generate_anchors(
159170def _generate_anchor (fs : FeatureStructure , add_index_mark : bool ) -> str :
160171 anchor = fs .type .name .rsplit ("." , 2 )[- 1 ] # Get the short type name (no package)
161172
162- if _is_annotation_fs (fs ):
173+ if is_annotation (fs ):
163174 anchor += f"[{ fs .begin } -{ fs .end } ]"
164175
165176 if add_index_mark :
@@ -171,7 +182,7 @@ def _generate_anchor(fs: FeatureStructure, add_index_mark: bool) -> str:
171182 return anchor
172183
173184
174- def _is_primitive_value (value : any ) -> bool :
185+ def _is_primitive_value (value : Any ) -> bool :
175186 return type (value ) in (int , float , bool , str )
176187
177188
@@ -182,65 +193,62 @@ def _is_array_fs(fs: FeatureStructure) -> bool:
182193 return is_array (fs .type )
183194
184195
185- def _is_annotation_fs (fs : FeatureStructure ) -> bool :
186- return hasattr (fs , "begin" ) and isinstance (fs .begin , int ) and hasattr (fs , "end" ) and isinstance (fs .end , int )
187-
188-
189- def _compare_fs (type_ : Type , a : FeatureStructure , b : FeatureStructure ) -> int :
190- if a is b :
191- return 0
192-
193- # duck-typing check if something is a annotation - if yes, try sorting by offets
194- fs_a_is_annotation = _is_annotation_fs (a )
195- fs_b_is_annotation = _is_annotation_fs (b )
196- if fs_a_is_annotation != fs_b_is_annotation :
197- return - 1
198- if fs_a_is_annotation and fs_b_is_annotation :
199- begin_cmp = a .begin - b .begin
200- if begin_cmp != 0 :
201- return begin_cmp
202-
203- begin_cmp = b .end - a .end
204- if begin_cmp != 0 :
205- return begin_cmp
206-
207- # Alternative implementation
208- # Doing arithmetics on the hash value as we have done with the offsets does not work because the hashes do not
209- # provide a global order. Hence, we map all results to 0, -1 and 1 here.
210- fs_hash_a = _feature_structure_hash (type_ , a )
211- fs_hash_b = _feature_structure_hash (type_ , b )
212- if fs_hash_a == fs_hash_b :
213- return 0
214- return - 1 if fs_hash_a < fs_hash_b else 1
215-
216-
217196def _feature_structure_hash (type_ : Type , fs : FeatureStructure ):
218- hash_ = 0
197+ # For backward compatibility keep a function that returns a stable string
198+ # representation of the FS contents. This is used as a deterministic
199+ # tie-breaker when sorting. We avoid returning complex nested tuples to
200+ # keep comparisons simple and stable across original and deserialized CASes.
201+ def _render_val (v ):
202+ if v is None :
203+ return "<NULL>"
204+ if type (v ) in (int , float , bool , str ):
205+ return str (v )
206+ if _is_array_fs (v ):
207+ # Join element representations with '|'
208+ return "[" + "," .join (_render_val (e ) for e in (v .elements or [])) + "]"
209+ # Feature structure reference
210+ try :
211+ if is_annotation (v ):
212+ return f"{ v .type .name } @{ v .begin } -{ v .end } "
213+ else :
214+ return f"{ v .type .name } "
215+ except Exception :
216+ return str (v )
217+
219218 if _is_array_fs (fs ):
220- return len (fs .elements ) if fs . elements else 0
219+ return _render_val (fs .elements or [])
221220
222- # Should be possible to get away with not sorting here assuming that all_features returns the features always in
223- # the same order
221+ parts : list [str ] = []
224222 for feature in type_ .all_features :
225223 if feature .name == FEATURE_BASE_NAME_SOFA :
226224 continue
227-
228- feature_value = getattr (fs , feature .name )
229-
230- if _is_array_fs (feature_value ):
231- if feature_value .elements is not None :
232- for element in feature_value .elements :
233- hash_ = _feature_value_hash (feature_value , hash_ )
225+ parts .append (_render_val (getattr (fs , feature .name )))
226+ return "|" .join (parts )
227+
228+
229+ def _normalize_feature_value (value : Any ):
230+ """Return a stable, comparable representation for a feature value.
231+
232+ Primitives are returned as-is. Feature structure references are normalized
233+ to a tuple containing the referenced type name and offsets if the target
234+ is an annotation. Arrays are represented as tuples of normalized elements.
235+ """
236+ # Use tagged tuples to guarantee consistent types and deterministic
237+ # ordering during comparisons. This avoids runtime TypeErrors when
238+ # different kinds of values (None, tuple, primitive) would otherwise
239+ # be compared directly.
240+ if value is None :
241+ return ("N" ,)
242+ if type (value ) in (int , float , bool , str ):
243+ return ("P" , value )
244+ if _is_array_fs (value ):
245+ return ("A" ,) + tuple (_normalize_feature_value (e ) for e in (value .elements or []))
246+ # Feature structure reference
247+ try :
248+ if is_annotation (value ):
249+ return ("FS" , value .type .name , value .begin , value .end )
234250 else :
235- hash_ = _feature_value_hash (feature_value , hash_ )
236- return hash_
237-
238-
239- def _feature_value_hash (feature_value : any , hash_ : int ):
240- # Note we do not recurse further into arrays here because that could lead to endless loops!
241- if type (feature_value ) in (int , float , bool , str ):
242- return hash_ + hash (feature_value )
243- else :
244- # If we get here, it is a feature structure reference... we cannot really recursively
245- # go into it to calculate a recursive hash... so we just check if the value is non-null
246- return hash_ * (- 1 if feature_value is None else 1 )
251+ return ("FS" , value .type .name )
252+ except Exception :
253+ # Fallback: string representation
254+ return ("FS" , getattr (getattr (value , "type" , None ), "name" , str (value )))
0 commit comments