@@ -2,7 +2,7 @@ module LightGBM
22 class Dataset
33 attr_reader :data , :params
44
5- def initialize ( data , label : nil , weight : nil , group : nil , params : nil , reference : nil , used_indices : nil , categorical_feature : "auto" , feature_names : nil )
5+ def initialize ( data , label : nil , weight : nil , group : nil , params : nil , reference : nil , used_indices : nil , categorical_feature : "auto" , feature_name : nil , feature_names : nil )
66 @data = data
77 @label = label
88 @weight = weight
@@ -11,7 +11,7 @@ def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference
1111 @reference = reference
1212 @used_indices = used_indices
1313 @categorical_feature = categorical_feature
14- @feature_names = feature_names
14+ @feature_name = feature_name || feature_names || "auto"
1515
1616 construct
1717 end
@@ -24,7 +24,7 @@ def weight
2424 field ( "weight" )
2525 end
2626
27- def feature_names
27+ def feature_name
2828 # must preallocate space
2929 num_feature_names = ::FFI ::MemoryPointer . new ( :int )
3030 out_buffer_len = ::FFI ::MemoryPointer . new ( :size_t )
@@ -48,6 +48,7 @@ def feature_names
4848 # from most recent call (instead of num_features)
4949 str_ptrs [ 0 , num_feature_names . read_int ] . map ( &:read_string )
5050 end
51+ alias_method :feature_names , :feature_name
5152
5253 def label = ( label )
5354 @label = label
@@ -64,12 +65,15 @@ def group=(group)
6465 set_field ( "group" , group , type : :int32 )
6566 end
6667
67- def feature_names = ( feature_names )
68+ def feature_name = ( feature_names )
6869 @feature_names = feature_names
6970 c_feature_names = ::FFI ::MemoryPointer . new ( :pointer , feature_names . size )
70- c_feature_names . write_array_of_pointer ( feature_names . map { |v | ::FFI ::MemoryPointer . from_string ( v ) } )
71+ # keep reference to string pointers
72+ str_ptrs = feature_names . map { |v | ::FFI ::MemoryPointer . from_string ( v ) }
73+ c_feature_names . write_array_of_pointer ( str_ptrs )
7174 check_result FFI . LGBM_DatasetSetFeatureNames ( handle_pointer , c_feature_names , feature_names . size )
7275 end
76+ alias_method :feature_names= , :feature_name=
7377
7478 # TODO only update reference if not in chain
7579 def reference = ( reference )
@@ -106,12 +110,7 @@ def subset(used_indices, params: nil)
106110 end
107111
108112 def handle_pointer
109- @handle . read_pointer
110- end
111-
112- def self . finalize ( addr )
113- # must use proc instead of stabby lambda
114- proc { FFI . LGBM_DatasetFree ( ::FFI ::Pointer . new ( :pointer , addr ) ) }
113+ @handle
115114 end
116115
117116 private
@@ -127,25 +126,33 @@ def construct
127126 end
128127 set_verbosity ( params )
129128
130- @ handle = ::FFI ::MemoryPointer . new ( :pointer )
129+ handle = ::FFI ::MemoryPointer . new ( :pointer )
131130 parameters = params_str ( params )
132131 reference = @reference . handle_pointer if @reference
133132 if used_indices
134133 used_row_indices = ::FFI ::MemoryPointer . new ( :int32 , used_indices . count )
135134 used_row_indices . write_array_of_int32 ( used_indices )
136- check_result FFI . LGBM_DatasetGetSubset ( reference , used_row_indices , used_indices . count , parameters , @ handle)
135+ check_result FFI . LGBM_DatasetGetSubset ( reference , used_row_indices , used_indices . count , parameters , handle )
137136 elsif data . is_a? ( String )
138- check_result FFI . LGBM_DatasetCreateFromFile ( data , parameters , reference , @ handle)
137+ check_result FFI . LGBM_DatasetCreateFromFile ( data , parameters , reference , handle )
139138 else
140139 if matrix? ( data )
141140 nrow = data . row_count
142141 ncol = data . column_count
143142 flat_data = data . to_a . flatten
144143 elsif daru? ( data )
144+ if @feature_name == "auto"
145+ @feature_name = data . vectors . to_a
146+ end
145147 nrow , ncol = data . shape
146148 flat_data = data . map_rows ( &:to_a ) . flatten
147- elsif numo? ( data ) || rover? ( data )
148- data = data . to_numo if rover? ( data )
149+ elsif numo? ( data )
150+ nrow , ncol = data . shape
151+ elsif rover? ( data )
152+ if @feature_name == "auto"
153+ @feature_name = data . keys
154+ end
155+ data = data . to_numo
149156 nrow , ncol = data . shape
150157 else
151158 nrow = data . count
@@ -161,14 +168,18 @@ def construct
161168 c_data . write_array_of_double ( flat_data )
162169 end
163170
164- check_result FFI . LGBM_DatasetCreateFromMat ( c_data , 1 , nrow , ncol , 1 , parameters , reference , @handle )
171+ check_result FFI . LGBM_DatasetCreateFromMat ( c_data , 1 , nrow , ncol , 1 , parameters , reference , handle )
172+ end
173+ if used_indices
174+ @handle = handle . read_pointer
175+ else
176+ @handle = ::FFI ::AutoPointer . new ( handle . read_pointer , FFI . method ( :LGBM_DatasetFree ) )
165177 end
166- ObjectSpace . define_finalizer ( @handle , self . class . finalize ( handle_pointer . to_i ) ) unless used_indices
167178
168179 self . label = @label if @label
169180 self . weight = @weight if @weight
170181 self . group = @group if @group
171- self . feature_names = @feature_names if @feature_names
182+ self . feature_name = @feature_name if @feature_name && @feature_name != "auto"
172183 end
173184
174185 def dump_text ( filename )
0 commit comments