1818
1919from traceback import format_exception_only
2020from collections import namedtuple , OrderedDict
21- from itertools import chain , count
21+ from itertools import chain , count , starmap
2222from typing import List , Dict , Any
2323
2424import numpy as np
3232from orangewidget .utils .combobox import ComboBoxSearch
3333
3434import Orange
35+ from Orange .data import Variable , Table , Value , Instance
3536from Orange .data .util import get_unique_names
3637from Orange .widgets import gui
3738from Orange .widgets .settings import ContextSetting , DomainContextHandler
38- from Orange .widgets .utils import itemmodels , vartype
39+ from Orange .widgets .utils import (
40+ itemmodels , vartype , ftry , unique_everseen as unique
41+ )
3942from Orange .widgets .utils .sql import check_sql_input
4043from Orange .widgets import report
4144from Orange .widgets .utils .widgetpreview import WidgetPreview
4245from Orange .widgets .widget import OWWidget , Msg , Input , Output
4346
47+
4448FeatureDescriptor = \
4549 namedtuple ("FeatureDescriptor" , ["name" , "expression" ])
4650
@@ -729,11 +733,14 @@ def duplicateFeature(self):
729733
730734 @staticmethod
731735 def check_attrs_values (attr , data ):
732- for i in range (len (data )):
733- for var in attr :
734- if not math .isnan (data [i , var ]) \
735- and int (data [i , var ]) >= len (var .values ):
736- return var .name
736+ for var in attr :
737+ col , _ = data .get_column_view (var )
738+ mask = ~ np .isnan (col )
739+ grater_or_equal = np .greater_equal (
740+ col , len (var .values ), out = mask , where = mask
741+ )
742+ if grater_or_equal .any ():
743+ return var .name
737744 return None
738745
739746 def _validate_descriptors (self , desc ):
@@ -1162,25 +1169,59 @@ def __init__(self, expression, args, extra_env=None, cast=None, use_values=False
11621169 self .mask_exceptions = True
11631170 self .use_values = use_values
11641171
1165- def __call__ (self , instance , * _ ):
1166- if isinstance (instance , Orange . data . Table ):
1167- return [ self ( inst ) for inst in instance ]
1172+ def __call__ (self , table , * _ ):
1173+ if isinstance (table , Table ):
1174+ return self . __call_table ( table )
11681175 else :
1169- try :
1170- args = [str (instance [var ]) if var .is_string
1171- else var .values [int (instance [var ])] if var .is_discrete and not self .use_values
1172- else instance [var ]
1173- for _ , var in self .args ]
1174- y = self .func (* args )
1175- # user's expression can contain arbitrary errors
1176- # this also covers missing attributes
1177- except : # pylint: disable=bare-except
1178- if not self .mask_exceptions :
1179- raise
1180- return np .nan
1181- if self .cast :
1182- y = self .cast (y )
1183- return y
1176+ return self .__call_instance (table )
1177+
1178+ def __call_table (self , table ):
1179+ try :
1180+ cols = [self .extract_column (table , var ) for _ , var in self .args ]
1181+ except ValueError :
1182+ if self .mask_exceptions :
1183+ return np .full (len (table ), np .nan )
1184+ else :
1185+ raise
1186+
1187+ if not cols :
1188+ args = [()] * len (table )
1189+ else :
1190+ args = zip (* cols )
1191+ f = self .func
1192+ if self .mask_exceptions :
1193+ y = list (starmap (ftry (f , Exception , np .nan ), args ))
1194+ else :
1195+ y = list (starmap (f , args ))
1196+ if self .cast is not None :
1197+ cast = self .cast
1198+ y = [cast (y_ ) for y_ in y ]
1199+ return y
1200+
1201+ def __call_instance (self , instance : Instance ):
1202+ table = Table .from_numpy (
1203+ instance .domain ,
1204+ np .array ([instance .x ]),
1205+ np .array ([instance .y ]),
1206+ np .array ([instance .metas ]),
1207+ )
1208+ return self .__call_table (table )[0 ]
1209+
1210+ def extract_column (self , table : Table , var : Variable ):
1211+ data , _ = table .get_column_view (var )
1212+ if var .is_string :
1213+ return list (map (var .str_val , data ))
1214+ elif var .is_discrete and not self .use_values :
1215+ values = np .array ([* var .values , None ], dtype = object )
1216+ idx = data .astype (int )
1217+ idx [~ np .isfinite (data )] = len (values ) - 1
1218+ return values [idx ].tolist ()
1219+ elif var .is_time : # time always needs Values due to str(val) formatting
1220+ return Value ._as_values (var , data .tolist ()) # pylint: disable=protected-access
1221+ elif not self .use_values :
1222+ return data .tolist ()
1223+ else :
1224+ return Value ._as_values (var , data .tolist ()) # pylint: disable=protected-access
11841225
11851226 def __reduce__ (self ):
11861227 return type (self ), (self .expression , self .args ,
@@ -1190,15 +1231,5 @@ def __repr__(self):
11901231 return "{0.__name__}{1!r}" .format (* self .__reduce__ ())
11911232
11921233
1193- def unique (seq ):
1194- seen = set ()
1195- unique_el = []
1196- for el in seq :
1197- if el not in seen :
1198- unique_el .append (el )
1199- seen .add (el )
1200- return unique_el
1201-
1202-
12031234if __name__ == "__main__" : # pragma: no cover
12041235 WidgetPreview (OWFeatureConstructor ).run (Orange .data .Table ("iris" ))
0 commit comments