@@ -31,7 +31,7 @@ from sklearn.model_selection import train_test_split
31
31
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
32
32
33
33
from deepctr.models import DeepFM
34
- from deepctr.inputs import SparseFeat, DenseFeat,get_fixlen_feature_names
34
+ from deepctr.inputs import SparseFeat, DenseFeat, get_feature_names
35
35
36
36
if __name__ == " __main__" :
37
37
data = pd.read_csv(' ./criteo_sample.txt' )
@@ -59,14 +59,13 @@ if __name__ == "__main__":
59
59
dnn_feature_columns = fixlen_feature_columns
60
60
linear_feature_columns = fixlen_feature_columns
61
61
62
- fixlen_feature_names = get_fixlen_feature_names (linear_feature_columns + dnn_feature_columns)
62
+ feature_names = get_feature_names (linear_feature_columns + dnn_feature_columns)
63
63
64
64
# 3.generate input data for model
65
65
66
66
train, test = train_test_split(data, test_size = 0.2 )
67
- train_model_input = [train[name] for name in fixlen_feature_names]
68
-
69
- test_model_input = [test[name] for name in fixlen_feature_names]
67
+ train_model_input = {name:train[name] for name in feature_names}
68
+ test_model_input = {name:test[name] for name in feature_names}
70
69
71
70
# 4.Define Model,train,predict and evaluate
72
71
model = DeepFM(linear_feature_columns, dnn_feature_columns, task = ' binary' )
@@ -91,7 +90,7 @@ from sklearn.model_selection import train_test_split
91
90
from sklearn.preprocessing import MinMaxScaler
92
91
93
92
from deepctr.models import DeepFM
94
- from deepctr.inputs import SparseFeat, DenseFeat,get_fixlen_feature_names
93
+ from deepctr.inputs import SparseFeat, DenseFeat,get_feature_names
95
94
96
95
if __name__ == " __main__" :
97
96
data = pd.read_csv(' ./criteo_sample.txt' )
@@ -115,14 +114,14 @@ if __name__ == "__main__":
115
114
116
115
linear_feature_columns = fixlen_feature_columns
117
116
dnn_feature_columns = fixlen_feature_columns
118
- fixlen_feature_names = get_fixlen_feature_names (linear_feature_columns + dnn_feature_columns, )
117
+ feature_names = get_feature_names (linear_feature_columns + dnn_feature_columns, )
119
118
120
119
# 3.generate input data for model
121
120
122
121
train, test = train_test_split(data, test_size = 0.2 )
123
- train_model_input = [train[name] for name in fixlen_feature_names]
124
122
125
- test_model_input = [test[name] for name in fixlen_feature_names]
123
+ train_model_input = {name:train[name] for name in feature_names}
124
+ test_model_input = {name:test[name] for name in feature_names}
126
125
127
126
128
127
# 4.Define Model,train,predict and evaluate
@@ -156,7 +155,7 @@ from sklearn.model_selection import train_test_split
156
155
from sklearn.preprocessing import LabelEncoder
157
156
158
157
from deepctr.models import DeepFM
159
- from deepctr.inputs import SparseFeat,get_fixlen_feature_names
158
+ from deepctr.inputs import SparseFeat,get_feature_names
160
159
161
160
if __name__ == " __main__" :
162
161
@@ -174,12 +173,13 @@ if __name__ == "__main__":
174
173
for feat in sparse_features]
175
174
linear_feature_columns = fixlen_feature_columns
176
175
dnn_feature_columns = fixlen_feature_columns
177
- fixlen_feature_names = get_fixlen_feature_names (linear_feature_columns + dnn_feature_columns)
176
+ feature_names = get_feature_names (linear_feature_columns + dnn_feature_columns)
178
177
179
178
# 3.generate input data for model
180
179
train, test = train_test_split(data, test_size = 0.2 )
181
- train_model_input = [train[name].values for name in fixlen_feature_names]
182
- test_model_input = [test[name].values for name in fixlen_feature_names]
180
+ train_model_input = {name:train[name].values for name in feature_names}
181
+ test_model_input = {name:test[name].values for name in feature_names}
182
+
183
183
# 4.Define Model,train,predict and evaluate
184
184
model = DeepFM(linear_feature_columns, dnn_feature_columns, task = ' regression' )
185
185
model.compile(" adam" , " mse" , metrics = [' mse' ], )
@@ -228,7 +228,7 @@ from sklearn.preprocessing import LabelEncoder
228
228
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
229
229
230
230
from deepctr.models import DeepFM
231
- from deepctr.inputs import SparseFeat, VarLenSparseFeat,get_fixlen_feature_names,get_varlen_feature_names
231
+ from deepctr.inputs import SparseFeat, VarLenSparseFeat,get_feature_names
232
232
233
233
234
234
def split (x ):
@@ -239,49 +239,49 @@ def split(x):
239
239
key2index[key] = len (key2index) + 1
240
240
return list (map (lambda x : key2index[x], key_ans))
241
241
242
+ if __name__ == " __main__" :
243
+ data = pd.read_csv(" ./movielens_sample.txt" )
244
+ sparse_features = [" movie_id" , " user_id" ,
245
+ " gender" , " age" , " occupation" , " zip" , ]
246
+ target = [' rating' ]
242
247
243
- data = pd.read_csv(" ./movielens_sample.txt" )
244
- sparse_features = [" movie_id" , " user_id" ,
245
- " gender" , " age" , " occupation" , " zip" , ]
246
- target = [' rating' ]
248
+ # 1.Label Encoding for sparse features,and process sequence features
249
+ for feat in sparse_features:
250
+ lbe = LabelEncoder()
251
+ data[feat] = lbe.fit_transform(data[feat])
252
+ # preprocess the sequence feature
247
253
248
- # 1.Label Encoding for sparse features,and process sequence features
249
- for feat in sparse_features:
250
- lbe = LabelEncoder()
251
- data[feat] = lbe.fit_transform(data[feat])
252
- # preprocess the sequence feature
254
+ key2index = {}
255
+ genres_list = list (map (split, data[' genres' ].values))
256
+ genres_length = np.array(list (map (len , genres_list)))
257
+ max_len = max (genres_length)
258
+ # Notice : padding=`post`
259
+ genres_list = pad_sequences(genres_list, maxlen = max_len, padding = ' post' , )
253
260
254
- key2index = {}
255
- genres_list = list (map (split, data[' genres' ].values))
256
- genres_length = np.array(list (map (len , genres_list)))
257
- max_len = max (genres_length)
258
- # Notice : padding=`post`
259
- genres_list = pad_sequences(genres_list, maxlen = max_len, padding = ' post' , )
261
+ # 2.count #unique features for each sparse field and generate feature config for sequence feature
260
262
261
- # 2.count #unique features for each sparse field and generate feature config for sequence feature
263
+ fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique())
264
+ for feat in sparse_features]
265
+ varlen_feature_columns = [VarLenSparseFeat(' genres' , len (
266
+ key2index) + 1 , max_len, ' mean' )] # Notice : value 0 is for padding for sequence input feature
262
267
263
- fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique())
264
- for feat in sparse_features]
265
- varlen_feature_columns = [VarLenSparseFeat(' genres' , len (
266
- key2index) + 1 , max_len, ' mean' )] # Notice : value 0 is for padding for sequence input feature
268
+ linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
269
+ dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
267
270
268
- linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
269
- dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
270
- fixlen_feature_names = get_fixlen_feature_names(linear_feature_columns + dnn_feature_columns)
271
- varlen_feature_names = get_varlen_feature_names(linear_feature_columns+ dnn_feature_columns)
271
+ feature_names = get_feature_names(linear_feature_columns+ dnn_feature_columns)
272
272
273
273
274
- # 3.generate input data for model
275
- fixlen_input = [data[name].values for name in fixlen_feature_names]
276
- varlen_input = [genres_list]# varlen_feature_names[0]
277
- model_input = fixlen_input + varlen_input # make sure the order is right
274
+ # 3.generate input data for model
275
+ model_input = {name:data[name] for name in feature_names}#
276
+ model_input[" genres" ] = genres_list
278
277
279
- # 4.Define Model,compile and train
280
- model = DeepFM(linear_feature_columns,dnn_feature_columns,task = ' regression' )
281
278
282
- model.compile(" adam" , " mse" , metrics = [' mse' ], )
283
- history = model.fit(model_input, data[target].values,
284
- batch_size = 256 , epochs = 10 , verbose = 2 , validation_split = 0.2 , )
279
+ # 4.Define Model,compile and train
280
+ model = DeepFM(linear_feature_columns,dnn_feature_columns,task = ' regression' )
281
+
282
+ model.compile(" adam" , " mse" , metrics = [' mse' ], )
283
+ history = model.fit(model_input, data[target].values,
284
+ batch_size = 256 , epochs = 10 , verbose = 2 , validation_split = 0.2 , )
285
285
286
286
```
287
287
@@ -293,44 +293,43 @@ import pandas as pd
293
293
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
294
294
295
295
from deepctr.models import DeepFM
296
- from deepctr.inputs import SparseFeat, VarLenSparseFeat,get_fixlen_feature_names
297
-
298
- data = pd.read_csv(" ./movielens_sample.txt" )
299
- sparse_features = [" movie_id" , " user_id" ,
300
- " gender" , " age" , " occupation" , " zip" , ]
296
+ from deepctr.inputs import SparseFeat, VarLenSparseFeat,get_feature_names
301
297
302
- data[sparse_features] = data[sparse_features].astype(str )
303
- target = [' rating' ]
298
+ if __name__ == " __main__" :
299
+ data = pd.read_csv(" ./movielens_sample.txt" )
300
+ sparse_features = [" movie_id" , " user_id" ,
301
+ " gender" , " age" , " occupation" , " zip" , ]
304
302
305
- # 1.Use hashing encoding on the fly for sparse features,and process sequence features
303
+ data[sparse_features] = data[sparse_features].astype(str )
304
+ target = [' rating' ]
306
305
307
- genres_list = list (map (lambda x : x.split(' |' ), data[' genres' ].values))
308
- genres_length = np.array(list (map (len , genres_list)))
309
- max_len = max (genres_length)
306
+ # 1.Use hashing encoding on the fly for sparse features,and process sequence features
310
307
311
- # Notice : padding=`post`
312
- genres_list = pad_sequences(genres_list, maxlen = max_len, padding = ' post' , dtype = str , value = 0 )
308
+ genres_list = list (map (lambda x : x.split(' |' ), data[' genres' ].values))
309
+ genres_length = np.array(list (map (len , genres_list)))
310
+ max_len = max (genres_length)
313
311
314
- # 2.set hashing space for each sparse field and generate feature config for sequence feature
312
+ # Notice : padding=`post`
313
+ genres_list = pad_sequences(genres_list, maxlen = max_len, padding = ' post' , dtype = str , value = 0 )
315
314
316
- fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5 , use_hash = True , dtype = ' string' )
317
- for feat in sparse_features]
318
- varlen_feature_columns = [VarLenSparseFeat(' genres' , 100 , max_len, ' mean' , use_hash = True ,
319
- dtype = " string" )] # Notice : value 0 is for padding for sequence input feature
320
- linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
321
- dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
322
- feature_names = get_fixlen_feature_names(linear_feature_columns + dnn_feature_columns)
315
+ # 2.set hashing space for each sparse field and generate feature config for sequence feature
323
316
324
- # 3.generate input data for model
325
- fixlen_input = [data[name].values for name in feature_names]
326
- varlen_input = [genres_list]
317
+ fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5 , use_hash = True , dtype = ' string' )
318
+ for feat in sparse_features]
319
+ varlen_feature_columns = [VarLenSparseFeat(' genres' , 100 , max_len, ' mean' , use_hash = True ,
320
+ dtype = " string" )] # Notice : value 0 is for padding for sequence input feature
321
+ linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
322
+ dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
323
+ feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
327
324
328
- model_input = fixlen_input + varlen_input # make sure the order is right
325
+ # 3.generate input data for model
326
+ model_input = {name:data[name] for name in feature_names}
327
+ model_input[' genres' ] = genres_list
329
328
330
- # 4.Define Model,compile and train
331
- model = DeepFM(linear_feature_columns,dnn_feature_columns, task = ' regression' )
329
+ # 4.Define Model,compile and train
330
+ model = DeepFM(linear_feature_columns,dnn_feature_columns, task = ' regression' )
332
331
333
- model.compile(" adam" , " mse" , metrics = [' mse' ], )
334
- history = model.fit(model_input, data[target].values,
335
- batch_size = 256 , epochs = 10 , verbose = 2 , validation_split = 0.2 , )
332
+ model.compile(" adam" , " mse" , metrics = [' mse' ], )
333
+ history = model.fit(model_input, data[target].values,
334
+ batch_size = 256 , epochs = 10 , verbose = 2 , validation_split = 0.2 , )
336
335
```
0 commit comments