@@ -1204,6 +1204,9 @@ def __init__(self, root, omw_reader):
12041204 assert int (index ) == i
12051205 self ._lexnames .append (lexname )
12061206
1207+ # Build a set of adjective satellite offsets
1208+ self ._scan_satellites ()
1209+
12071210 # Load the indices for lemmas and synset offsets
12081211 self ._load_lemma_pos_offset_map ()
12091212
@@ -1324,7 +1327,7 @@ def _load_lang_data(self, lang):
13241327 self .add_omw ()
13251328
13261329 if lang not in self .langs ():
1327- raise WordNetError ("Language is not supported." )
1330+ raise WordNetError (f "Language { lang } is not supported." )
13281331
13291332 if self ._exomw_reader and lang not in self .omw_langs :
13301333 reader = self ._exomw_reader
@@ -1379,6 +1382,30 @@ def langs(self):
13791382 """return a list of languages supported by Multilingual Wordnet"""
13801383 return list (self .provenances .keys ())
13811384
1385+ def _scan_satellites (self ):
1386+ """
1387+ Scans the adjective data file and populates self.satellite_offsets with all adjective satellite synset offsets.
1388+
1389+ This method reads the adjective data file associated with the corpus reader,
1390+ identifies synsets of type 's' (adjective satellites), and adds their offsets
1391+ to the self.satellite_offsets set. The method does not return a value.
1392+ """
1393+ adj_data_file = self ._data_file (ADJ )
1394+ satellite_offsets = set ()
1395+ adj_data_file .seek (0 )
1396+ for line in adj_data_file :
1397+ if not line .strip () or line .startswith (" " ):
1398+ continue
1399+ fields = line .strip ().split ()
1400+ if len (fields ) < 3 :
1401+ continue
1402+ synset_offset = fields [0 ]
1403+ synset_type = fields [2 ]
1404+ if synset_type == "s" :
1405+ satellite_offsets .add (int (synset_offset ))
1406+ adj_data_file .seek (0 ) # Reset if needed elsewhere
1407+ self .satellite_offsets = satellite_offsets
1408+
13821409 def _load_lemma_pos_offset_map (self ):
13831410 for suffix in self ._FILEMAP .values ():
13841411 # parse each line of the file (ignoring comment lines)
@@ -1425,8 +1452,15 @@ def _next_token():
14251452 # map lemmas and parts of speech to synsets
14261453 self ._lemma_pos_offset_map [lemma ][pos ] = synset_offsets
14271454 if pos == ADJ :
1428- # Duplicate all adjectives indiscriminately?:
1429- self ._lemma_pos_offset_map [lemma ][ADJ_SAT ] = synset_offsets
1455+ # index.adj uses only the ADJ pos, so identify ADJ_SAT using satellites set
1456+ satellite_offsets = [
1457+ # Keep the ordering from index.adj
1458+ offset
1459+ for offset in synset_offsets
1460+ if offset in self .satellite_offsets
1461+ ]
1462+ # Duplicate only a (possibly empty) list of real satellites
1463+ self ._lemma_pos_offset_map [lemma ][ADJ_SAT ] = satellite_offsets
14301464
14311465 def _load_exception_map (self ):
14321466 # load the exception file data into memory
0 commit comments