-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmart.py
More file actions
executable file
·99 lines (71 loc) · 3.01 KB
/
mart.py
File metadata and controls
executable file
·99 lines (71 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#########################################
# #
# Group 24 #
# (2018) #
# Vrije Universiteit #
# LambdaMART #
# #
#########################################
# Credits pyltr
#
# source: https://github.com/jma127/pyltr
#
# Copyright (c) 2015, Jerry Ma
# All rights reserved.
import pyltr
import matplotlib.pyplot as plt
import pandas as pd
metric = pyltr.metrics.NDCG(k=10)
all_features = ['srch_id', 'site_id', 'visitor_location_country_id',
'visitor_hist_starrating', 'visitor_hist_adr_usd', 'prop_country_id',
'prop_id', 'prop_starrating', 'prop_review_score', 'prop_brand_bool',
'prop_location_score1', 'prop_location_score2',
'prop_log_historical_price', 'position', 'price_usd', 'promotion_flag',
'srch_destination_id', 'srch_length_of_stay', 'srch_booking_window',
'srch_adults_count', 'srch_children_count', 'srch_room_count',
'srch_saturday_night_bool', 'srch_query_affinity_score',
'orig_destination_distance', 'random_bool', 'click_bool',
'gross_bookings_usd', 'booking_bool', 'rate_sum', 'inv_sum',
'diff_mean', 'rate_abs', 'inv_abs']
col = ['rate_sum', 'inv_sum','prop_starrating', 'prop_review_score', 'prop_brand_bool',
'visitor_hist_starrating', 'visitor_hist_adr_usd', 'prop_country_id',
'prop_location_score1', 'prop_location_score2', 'prop_log_historical_price',
'srch_destination_id', 'srch_length_of_stay', 'srch_booking_window',
'srch_adults_count', 'srch_children_count', 'srch_room_count']
filename = 'data/clean_train.csv'
nrows = 20000
def data_sets(filename, col, nrows):
df = pd.read_csv(filename, skiprows=(1,2), nrows=nrows)
Ty = df['click_bool'] + df['booking_bool']
TX = df[col]
Tqids = df['srch_id']
df2 = pd.read_csv(filename, skiprows=range(1,nrows + 2), nrows=nrows)
Vy = df2['click_bool'] + df['booking_bool']
VX = df2[col]
Vqids = df2['srch_id']
df3 = pd.read_csv(filename, skiprows=range(1,2*nrows+2), nrows=nrows)
Ey = df3['click_bool'] + df['booking_bool']
EX = df3[col]
Eqids = df3['srch_id']
return Ty, TX, Tqids, Vy, VX, Tqids, Ey, Ex, Eqids
Ty, TX, Tqids, Vy, VX, Vqids, Ey, Ex, Eqids = data_sets(filename, col, nrows)
VX = VX.fillna(0)
TX = TX.fillna(0)
EX = EX.fillna(0)
# Only needed if you want to perform validation (early stopping & trimming)
monitor = pyltr.models.monitors.ValidationMonitor(
VX, Vy, Vqids, metric=metric, stop_after=250)
model = pyltr.models.LambdaMART(
metric=metric,
n_estimators=1000,
learning_rate=0.02,
max_features=0.5,
query_subsample=0.5,
max_leaf_nodes=10,
min_samples_leaf=64,
verbose=1,
)
model.fit(TX, Ty, Tqids, monitor=monitor)
Epred = model.predict(EX)
print('Random ranking:', metric.calc_mean_random(Eqids, Ey))
print('Our model:', metric.calc_mean(Eqids, Ey, Epred))