@@ -40,13 +40,10 @@ def setup_class(cls):
40
40
cls .gamma_true = 0.6567
41
41
42
42
# Use Quickstart dataset (the CDNOW_sample research data) for testing
43
- test_data = pd .read_csv ("data/bgbb_donations.csv" )
43
+ cls . data = pd .read_csv ("data/bgbb_donations.csv" )
44
44
45
- cls .data = test_data
46
- # cls.customer_id = test_data["customer_id"]
47
- # cls.frequency = test_data["frequency"]
48
- # cls.recency = test_data["recency"]
49
- # cls.T = test_data["T"]
45
+ # sample from full dataset for tests involving model fits
46
+ cls .sample_data = cls .data .sample (n = 1000 , random_state = 45 )
50
47
51
48
# take sample of all unique recency/frequency/T combinations to test predictive methods
52
49
test_customer_ids = [
@@ -74,8 +71,8 @@ def setup_class(cls):
74
71
11103 ,
75
72
]
76
73
77
- cls .sample_data = test_data .query ("customer_id.isin(@test_customer_ids)" )
78
- cls .sample_data_N = len (test_customer_ids )
74
+ cls .pred_data = cls . data .query ("customer_id.isin(@test_customer_ids)" )
75
+ cls .pred_data_N = len (test_customer_ids )
79
76
80
77
# Instantiate model with CDNOW data for testing
81
78
cls .model = BetaGeoBetaBinomModel (cls .data )
@@ -278,13 +275,16 @@ def test_model_repr(self, custom_config):
278
275
@pytest .mark .parametrize (
279
276
"fit_method, rtol" ,
280
277
[
281
- ("mcmc" , 0.1 ),
278
+ (
279
+ "mcmc" ,
280
+ 0.3 ,
281
+ ), # higher rtol required for sample_data; within .1 tolerance for full dataset;
282
282
("map" , 0.2 ),
283
283
],
284
284
)
285
285
def test_model_convergence (self , fit_method , rtol , model_config ):
286
286
model = BetaGeoBetaBinomModel (
287
- data = self .data ,
287
+ data = self .sample_data ,
288
288
model_config = model_config ,
289
289
)
290
290
model .build_model ()
@@ -307,7 +307,7 @@ def test_model_convergence(self, fit_method, rtol, model_config):
307
307
)
308
308
309
309
def test_fit_result_without_fit (self , model_config ):
310
- model = BetaGeoBetaBinomModel (data = self .data , model_config = model_config )
310
+ model = BetaGeoBetaBinomModel (data = self .pred_data , model_config = model_config )
311
311
with pytest .raises (RuntimeError , match = "The model hasn't been fit yet" ):
312
312
model .fit_result
313
313
@@ -327,20 +327,20 @@ def test_expected_purchases(self, test_t):
327
327
true_purchases = (
328
328
self .lifetimes_model .conditional_expected_number_of_purchases_up_to_time (
329
329
m_periods_in_future = test_t ,
330
- frequency = self .sample_data ["frequency" ],
331
- recency = self .sample_data ["recency" ],
332
- n_periods = self .sample_data ["T" ],
330
+ frequency = self .pred_data ["frequency" ],
331
+ recency = self .pred_data ["recency" ],
332
+ n_periods = self .pred_data ["T" ],
333
333
)
334
334
)
335
335
336
336
# test parametrization with default data has different dims
337
337
est_num_purchases = self .model .expected_purchases (future_t = test_t )
338
338
assert est_num_purchases .shape == (self .chains , self .draws , self .N )
339
339
340
- data = self .sample_data .assign (future_t = test_t )
340
+ data = self .pred_data .assign (future_t = test_t )
341
341
est_num_purchases = self .model .expected_purchases (data )
342
342
343
- assert est_num_purchases .shape == (self .chains , self .draws , self .sample_data_N )
343
+ assert est_num_purchases .shape == (self .chains , self .draws , self .pred_data_N )
344
344
assert est_num_purchases .dims == ("chain" , "draw" , "customer_id" )
345
345
346
346
np .testing .assert_allclose (
@@ -398,33 +398,33 @@ def test_expected_purchases_new_customer(self):
398
398
def test_expected_probability_alive (self , test_t ):
399
399
true_prob_alive = self .lifetimes_model .conditional_probability_alive (
400
400
m_periods_in_future = test_t ,
401
- frequency = self .sample_data ["frequency" ],
402
- recency = self .sample_data ["recency" ],
403
- n_periods = self .sample_data ["T" ],
401
+ frequency = self .pred_data ["frequency" ],
402
+ recency = self .pred_data ["recency" ],
403
+ n_periods = self .pred_data ["T" ],
404
404
)
405
405
406
406
# test parametrization with default data has different dims
407
407
est_prob_alive = self .model .expected_probability_alive (future_t = test_t )
408
408
assert est_prob_alive .shape == (self .chains , self .draws , self .N )
409
409
410
- sample_data = self .sample_data .assign (future_t = test_t )
411
- est_prob_alive = self .model .expected_probability_alive (sample_data )
410
+ pred_data = self .pred_data .assign (future_t = test_t )
411
+ est_prob_alive = self .model .expected_probability_alive (pred_data )
412
412
413
- assert est_prob_alive .shape == (self .chains , self .draws , self .sample_data_N )
413
+ assert est_prob_alive .shape == (self .chains , self .draws , self .pred_data_N )
414
414
assert est_prob_alive .dims == ("chain" , "draw" , "customer_id" )
415
415
np .testing .assert_allclose (
416
416
true_prob_alive ,
417
417
est_prob_alive .mean (("chain" , "draw" )),
418
418
rtol = 0.01 ,
419
419
)
420
420
421
- alt_data = self .sample_data .assign (future_t = 7.5 )
421
+ alt_data = self .pred_data .assign (future_t = 7.5 )
422
422
est_prob_alive_t = self .model .expected_probability_alive (alt_data )
423
423
assert est_prob_alive .mean () > est_prob_alive_t .mean ()
424
424
425
425
def test_distribution_new_customer (self ) -> None :
426
426
mock_model = BetaGeoBetaBinomModel (
427
- data = self .data ,
427
+ data = self .sample_data ,
428
428
)
429
429
mock_model .build_model ()
430
430
mock_model .idata = az .from_dict (
@@ -444,7 +444,7 @@ def test_distribution_new_customer(self) -> None:
444
444
random_seed = rng
445
445
)
446
446
customer_rec_freq = mock_model .distribution_new_customer_recency_frequency (
447
- self .data , T = self .data ["T" ], random_seed = rng
447
+ self .sample_data , T = self .sample_data ["T" ], random_seed = rng
448
448
)
449
449
customer_rec = customer_rec_freq .sel (obs_var = "recency" )
450
450
customer_freq = customer_rec_freq .sel (obs_var = "frequency" )
@@ -463,7 +463,7 @@ def test_distribution_new_customer(self) -> None:
463
463
beta = self .beta_true ,
464
464
delta = self .delta_true ,
465
465
gamma = self .gamma_true ,
466
- T = self .data ["T" ],
466
+ T = self .sample_data ["T" ],
467
467
),
468
468
random_seed = rng ,
469
469
).T
0 commit comments