@@ -1204,6 +1204,9 @@ def __init__(self, root, omw_reader):
12041204 assert int (index ) == i
12051205 self ._lexnames .append (lexname )
12061206
1207+ # Build a set of adjective satellite offsets
1208+ self ._scan_satellites ()
1209+
12071210 # Load the indices for lemmas and synset offsets
12081211 self ._load_lemma_pos_offset_map ()
12091212
@@ -1379,8 +1382,24 @@ def langs(self):
13791382 """return a list of languages supported by Multilingual Wordnet"""
13801383 return list (self .provenances .keys ())
13811384
1382- def _load_lemma_pos_offset_map (self ):
1385+ def _scan_satellites (self ):
13831386 adj_data_file = self ._data_file (ADJ )
1387+ satellite_offsets = set ()
1388+ adj_data_file .seek (0 )
1389+ for line in adj_data_file :
1390+ if not line .strip () or line .startswith (" " ):
1391+ continue
1392+ fields = line .strip ().split ()
1393+ if len (fields ) < 3 :
1394+ continue
1395+ synset_offset = fields [0 ]
1396+ synset_type = fields [2 ]
1397+ if synset_type == "s" :
1398+ satellite_offsets .add (int (synset_offset ))
1399+ adj_data_file .seek (0 ) # Reset if needed elsewhere
1400+ self .satellite_offsets = satellite_offsets
1401+
1402+ def _load_lemma_pos_offset_map (self ):
13841403 for suffix in self ._FILEMAP .values ():
13851404 # parse each line of the file (ignoring comment lines)
13861405 with self .open ("index.%s" % suffix ) as fp :
@@ -1426,15 +1445,15 @@ def _next_token():
14261445 # map lemmas and parts of speech to synsets
14271446 self ._lemma_pos_offset_map [lemma ][pos ] = synset_offsets
14281447 if pos == ADJ :
1429- sat_offsets = []
1430- for offset in synset_offsets :
1431- adj_data_file .seek (offset )
1432- # Check in data.adj if offset pos is ADJ_SAT
1433- if adj_data_file .readline ()[12 :13 ] == ADJ_SAT :
1434- sat_offsets .append (offset )
1435- if sat_offsets :
1448+ # Filter adjective satellites:
1449+ satellite_offsets = [
1450+ of for of in synset_offsets if of in self .satellite_offsets
1451+ ]
1452+ if satellite_offsets :
14361453 # Duplicate only real satellites
1437- self ._lemma_pos_offset_map [lemma ][ADJ_SAT ] = sat_offsets
1454+ self ._lemma_pos_offset_map [lemma ][
1455+ ADJ_SAT
1456+ ] = satellite_offsets
14381457
14391458 def _load_exception_map (self ):
14401459 # load the exception file data into memory
0 commit comments