@@ -96,6 +96,7 @@ def fit(self, X, y, groups=None):
9696 self .model = None
9797 sparse = sp .issparse (X )
9898 if sparse is False :
99+ # potential bug: csr_matrix ignores all zero values in X
99100 X = sp .csr_matrix (X )
100101 X , y = check_X_y (X , y , dtype = np .float64 , order = 'C' , accept_sparse = 'csr' )
101102
@@ -182,6 +183,52 @@ def predict(self, X, groups=None):
182183 predict_label = [self .predict_label_ptr [index ] for index in range (0 , X .shape [0 ])]
183184 self .predict_label = np .asarray (predict_label )
184185 return self .predict_label
186+
187+ def predict_leaf (self , X , groups = None ):
188+ if self .model is None :
189+ print ("Please train the model first or load model from file!" )
190+ raise ValueError
191+ sparse = sp .isspmatrix (X )
192+ if sparse is False :
193+ X = sp .csr_matrix (X )
194+ X .data = np .asarray (X .data , dtype = np .float32 , order = 'C' )
195+ X .sort_indices ()
196+ data = X .data .ctypes .data_as (POINTER (c_float ))
197+ indices = X .indices .ctypes .data_as (POINTER (c_int32 ))
198+ indptr = X .indptr .ctypes .data_as (POINTER (c_int32 ))
199+ if (self .objective != 'multi:softprob' ):
200+ self .predict_label_ptr = (c_float * X .shape [0 ])()
201+ else :
202+ temp_size = X .shape [0 ] * self .num_class
203+ self .predict_label_ptr = (c_float * temp_size )()
204+ if self .group_label is not None :
205+ group_label = (c_float * len (self .group_label ))()
206+ group_label [:] = self .group_label
207+ else :
208+ group_label = None
209+ in_groups , num_groups = self ._construct_groups (groups )
210+ ins2leaf_c = (c_int32 * (X .shape [0 ] * self .n_trees ))()
211+ fedtree .predict_leaf (
212+ X .shape [0 ],
213+ data ,
214+ indptr ,
215+ indices ,
216+ self .predict_label_ptr ,
217+ byref (self .model ),
218+ self .n_trees ,
219+ self .tree_per_iter ,
220+ self .objective .encode ('utf-8' ),
221+ self .num_class ,
222+ c_float (self .learning_rate ),
223+ group_label ,
224+ in_groups ,
225+ ins2leaf_c ,
226+ num_groups , self .verbose , self .bagging ,
227+ )
228+ self .ins2leaf = np .array ([ins2leaf_c [i ] for i in range (X .shape [0 ] * self .n_trees )])
229+ # predict_label = [self.predict_label_ptr[index] for index in range(0, X.shape[0])]
230+ # self.predict_label = np.asarray(predict_label)
231+ return self .ins2leaf
185232
186233 def predict_proba (self , X , groups = None ):
187234 if self .model is None :
@@ -235,7 +282,6 @@ def predict_proba(self, X, groups=None):
235282 return self .predict_proba
236283
237284
238-
239285 def save_model (self , model_path ):
240286 if self .model is None :
241287 print ("Please train the model first or load model from file!" )
@@ -350,6 +396,75 @@ def cv(self, X, y, folds=None, nfold=5, shuffle=True, seed=0):
350396 print ("mean test RMSE:%.6f+%.6f" % (statistics .mean (test_score_list ), statistics .stdev (test_score_list )))
351397 return self .eval_res
352398
399+ def centralize_train_a_subtree (self , X , y , n_layer , input_gradient_g = None , input_gradient_h = None , groups = None ):
400+ n_ins = len (X )
401+ if self .model is not None :
402+ fedtree .model_free (byref (self .model ))
403+ self .model = None
404+ sparse = sp .issparse (X )
405+ if sparse is False :
406+ # potential bug: csr_matrix ignores all zero values in X
407+ X = sp .csr_matrix (X )
408+ X , y = check_X_y (X , y , dtype = np .float64 , order = 'C' , accept_sparse = 'csr' )
409+
410+ X .data = np .asarray (X .data , dtype = np .float32 , order = 'C' )
411+ X .sort_indices ()
412+ data = X .data .ctypes .data_as (POINTER (c_float ))
413+ indices = X .indices .ctypes .data_as (POINTER (c_int32 ))
414+ indptr = X .indptr .ctypes .data_as (POINTER (c_int32 ))
415+ y = np .asarray (y , dtype = np .float32 , order = 'C' )
416+ label = y .ctypes .data_as (POINTER (c_float ))
417+ in_groups , num_groups = self ._construct_groups (groups )
418+ group_label = (c_float * len (set (y )))()
419+ n_class = (c_int * 1 )()
420+ n_class [0 ] = self .num_class
421+ tree_per_iter_ptr = (c_int * 1 )()
422+ self .model = (c_long * 1 )()
423+ n_max_node = pow (2 , n_layer )
424+ # needs to represent instance ID as int
425+ insid_list = (c_int * n_ins )()
426+ n_ins_list = (c_int * n_max_node )()
427+ gradient_g_list = (c_float * n_ins )()
428+ gradient_h_list = (c_float * n_ins )()
429+ n_node = (c_int * 1 )()
430+ nodeid_list = (c_int * n_max_node )()
431+ input_gradient_g = np .asarray (input_gradient_g , dtype = np .float32 , order = 'C' )
432+ input_g = input_gradient_g .ctypes .data_as (POINTER (c_float ))
433+ input_gradient_h = np .asarray (input_gradient_h , dtype = np .float32 , order = 'C' )
434+ input_h = input_gradient_h .ctypes .data_as (POINTER (c_float ))
435+ fedtree .centralize_train_a_subtree (c_float (self .variance ), c_float (self .privacy_budget ),
436+ self .max_depth , self .n_trees , c_float (self .min_child_weight ), c_float (self .lambda_ft ), c_float (self .gamma ), c_float (self .column_sampling_rate ),
437+ self .verbose , self .bagging , self .n_parallel_trees , c_float (self .learning_rate ), self .objective .encode ('utf-8' ), n_class , self .n_device , self .max_num_bin ,
438+ self .seed , c_float (self .ins_bagging_fraction ), self .reorder_label , c_float (self .constant_h ),
439+ X .shape [0 ], data , indptr , indices , label , self .tree_method , byref (self .model ), tree_per_iter_ptr , group_label ,
440+ in_groups , num_groups , n_layer , insid_list , n_ins_list , gradient_g_list , gradient_h_list , n_node , nodeid_list , input_g , input_h )
441+ self .num_class = n_class [0 ]
442+ self .tree_per_iter = tree_per_iter_ptr [0 ]
443+ self .group_label = [group_label [idx ] for idx in range (len (set (y )))]
444+
445+ self .insid_list = [insid_list [i ] for i in range (n_ins )]
446+ self .n_ins_list = [n_ins_list [i ] for i in range (n_node [0 ])]
447+ self .gradient_g_list = [gradient_g_list [i ] for i in range (n_ins )]
448+ self .gradient_h_list = [gradient_h_list [i ] for i in range (n_ins )]
449+ self .n_node = n_node [0 ]
450+ self .nodeid_list = [nodeid_list [i ] for i in range (n_node [0 ])]
451+ if self .model is None :
452+ print ("The model returned is empty!" )
453+ exit ()
454+
455+ return self
456+
457+ def update_a_layer_cpp (self , X , ins , nins , gradient_g , gradient_h , n_node , lamb ):
458+ c_x = np .asarray (X , dtype = np .int32 ).data .ctypes .data_as (POINTER (c_int32 ))
459+ c_ins = np .asarray (ins , dtype = np .int32 ).data .ctypes .data_as (POINTER (c_int32 ))
460+ c_nins = np .asarray (nins , dtype = np .int32 ).data .ctypes .data_as (POINTER (c_int32 ))
461+ c_gradient_g = np .asarray (gradient_g , dtype = np .float32 ).data .ctypes .data_as (POINTER (c_float ))
462+ c_gradient_h = np .asarray (gradient_h , dtype = np .float32 ).data .ctypes .data_as (POINTER (c_float ))
463+ leaf_val = (c_float * (n_node * 2 ))()
464+ fedtree .update_a_layer_with_flag (c_x , c_ins , c_nins , c_gradient_g , c_gradient_h , n_node , leaf_val )
465+ self .leaf_val = [leaf_val [i ] for i in range (len (n_node * 2 ))]
466+
467+
353468class FLClassifier (FLModel , fedtreeClassifierBase ):
354469 _impl = 'classifier'
355470
0 commit comments