22import re
33import pytest
44import numpy as np
5+ import scipy
56from scipy .optimize import check_grad , approx_fprime
67from six .moves import xrange
7- from sklearn .metrics import pairwise_distances
8+ from sklearn .metrics import pairwise_distances , euclidean_distances
89from sklearn .datasets import (load_iris , make_classification , make_regression ,
910 make_spd_matrix )
1011from numpy .testing import (assert_array_almost_equal , assert_array_equal ,
@@ -304,25 +305,15 @@ def test_loss_grad_lbfgs(self):
304305 lmnn .components_ = np .eye (n_components )
305306
306307 target_neighbors = lmnn ._select_targets (X , label_inds )
307- impostors = lmnn ._find_impostors (target_neighbors [:, - 1 ], X , label_inds )
308308
309309 # sum outer products
310310 dfG = _sum_outer_products (X , target_neighbors .flatten (),
311311 np .repeat (np .arange (X .shape [0 ]), k ))
312- df = np .zeros_like (dfG )
313-
314- # storage
315- a1 = [None ]* k
316- a2 = [None ]* k
317- for nn_idx in xrange (k ):
318- a1 [nn_idx ] = np .array ([])
319- a2 [nn_idx ] = np .array ([])
320312
321313 # initialize L
322314 def loss_grad (flat_L ):
323- return lmnn ._loss_grad (X , flat_L .reshape (- 1 , X .shape [1 ]), dfG , impostors ,
324- 1 , k , reg , target_neighbors , df .copy (),
325- list (a1 ), list (a2 ))
315+ return lmnn ._loss_grad (X , flat_L .reshape (- 1 , X .shape [1 ]), dfG ,
316+ k , reg , target_neighbors , label_inds )
326317
327318 def fun (x ):
328319 return loss_grad (x )[1 ]
@@ -366,6 +357,141 @@ def test_deprecation_use_pca(self):
366357 assert_warns_message (DeprecationWarning , msg , lmnn .fit , X , y )
367358
368359
360+ def test_loss_func (capsys ):
361+ """Test the loss function (and its gradient) on a simple example,
362+ by comparing the results with the actual implementation of metric-learn,
363+ with a very simple (but nonperformant) implementation"""
364+
365+ # toy dataset to use
366+ X , y = make_classification (n_samples = 10 , n_classes = 2 ,
367+ n_features = 6 ,
368+ n_redundant = 0 , shuffle = True ,
369+ scale = [1 , 1 , 20 , 20 , 20 , 20 ], random_state = 42 )
370+
371+ def hinge (a ):
372+ if a > 0 :
373+ return a , 1
374+ else :
375+ return 0 , 0
376+
377+ def loss_fn (L , X , y , target_neighbors , reg ):
378+ L = L .reshape (- 1 , X .shape [1 ])
379+ Lx = np .dot (X , L .T )
380+ loss = 0
381+ total_active = 0
382+ grad = np .zeros_like (L )
383+ for i in range (X .shape [0 ]):
384+ for j in target_neighbors [i ]:
385+ loss += (1 - reg ) * np .sum ((Lx [i ] - Lx [j ]) ** 2 )
386+ grad += (1 - reg ) * np .outer (Lx [i ] - Lx [j ], X [i ] - X [j ])
387+ for l in range (X .shape [0 ]):
388+ if y [i ] != y [l ]:
389+ hin , active = hinge (1 + np .sum ((Lx [i ] - Lx [j ])** 2 ) -
390+ np .sum ((Lx [i ] - Lx [l ])** 2 ))
391+ total_active += active
392+ if active :
393+ loss += reg * hin
394+ grad += (reg * (np .outer (Lx [i ] - Lx [j ], X [i ] - X [j ]) -
395+ np .outer (Lx [i ] - Lx [l ], X [i ] - X [l ])))
396+ grad = 2 * grad
397+ return grad , loss , total_active
398+
399+ # we check that the gradient we have computed in the non-performant implem
400+ # is indeed the true gradient on a toy example:
401+
402+ def _select_targets (X , y , k ):
403+ target_neighbors = np .empty ((X .shape [0 ], k ), dtype = int )
404+ for label in np .unique (y ):
405+ inds , = np .nonzero (y == label )
406+ dd = euclidean_distances (X [inds ], squared = True )
407+ np .fill_diagonal (dd , np .inf )
408+ nn = np .argsort (dd )[..., :k ]
409+ target_neighbors [inds ] = inds [nn ]
410+ return target_neighbors
411+
412+ target_neighbors = _select_targets (X , y , 2 )
413+ regularization = 0.5
414+ n_features = X .shape [1 ]
415+ x0 = np .random .randn (1 , n_features )
416+
417+ def loss (x0 ):
418+ return loss_fn (x0 .reshape (- 1 , X .shape [1 ]), X , y , target_neighbors ,
419+ regularization )[1 ]
420+
421+ def grad (x0 ):
422+ return loss_fn (x0 .reshape (- 1 , X .shape [1 ]), X , y , target_neighbors ,
423+ regularization )[0 ].ravel ()
424+
425+ scipy .optimize .check_grad (loss , grad , x0 .ravel ())
426+
427+ class LMNN_with_callback (LMNN ):
428+ """ We will use a callback to get the gradient (see later)
429+ """
430+
431+ def __init__ (self , callback , * args , ** kwargs ):
432+ self .callback = callback
433+ super (LMNN_with_callback , self ).__init__ (* args , ** kwargs )
434+
435+ def _loss_grad (self , * args , ** kwargs ):
436+ grad , objective , total_active = (
437+ super (LMNN_with_callback , self )._loss_grad (* args , ** kwargs ))
438+ self .callback .append (grad )
439+ return grad , objective , total_active
440+
441+ class LMNN_nonperformant (LMNN_with_callback ):
442+
443+ def fit (self , X , y ):
444+ self .y = y
445+ return super (LMNN_nonperformant , self ).fit (X , y )
446+
447+ def _loss_grad (self , X , L , dfG , k , reg , target_neighbors , label_inds ):
448+ grad , loss , total_active = loss_fn (L .ravel (), X , self .y ,
449+ target_neighbors , self .regularization )
450+ self .callback .append (grad )
451+ return grad , loss , total_active
452+
453+ mem1 , mem2 = [], []
454+ lmnn_perf = LMNN_with_callback (verbose = True , random_state = 42 ,
455+ init = 'identity' , max_iter = 30 , callback = mem1 )
456+ lmnn_nonperf = LMNN_nonperformant (verbose = True , random_state = 42 ,
457+ init = 'identity' , max_iter = 30 ,
458+ callback = mem2 )
459+ objectives , obj_diffs , learn_rate , total_active = (dict (), dict (), dict (),
460+ dict ())
461+ for algo , name in zip ([lmnn_perf , lmnn_nonperf ], ['perf' , 'nonperf' ]):
462+ algo .fit (X , y )
463+ out , _ = capsys .readouterr ()
464+ lines = re .split ("\n +" , out )
465+ # we get every variable that is printed from the algorithm in verbose
466+ num = '(-?\d+.?\d*(e[+|-]\d+)?)'
467+ strings = [re .search ("\d+ (?:{}) (?:{}) (?:(\d+)) (?:{})"
468+ .format (num , num , num ), s ) for s in lines ]
469+ objectives [name ] = [float (match .group (1 )) for match in strings if match is
470+ not None ]
471+ obj_diffs [name ] = [float (match .group (3 )) for match in strings if match is
472+ not None ]
473+ total_active [name ] = [float (match .group (5 )) for match in strings if
474+ match is not
475+ None ]
476+ learn_rate [name ] = [float (match .group (6 )) for match in strings if match is
477+ not None ]
478+ assert len (strings ) >= 10 # we ensure that we actually did more than 10
479+ # iterations
480+ assert total_active [name ][0 ] >= 2 # we ensure that we have some active
481+ # constraints (that's the case we want to test)
482+ # we remove the last element because it can be equal to the penultimate
483+ # if the last gradient update is null
484+ for i in range (len (mem1 )):
485+ np .testing .assert_allclose (lmnn_perf .callback [i ],
486+ lmnn_nonperf .callback [i ],
487+ err_msg = 'Gradient different at position '
488+ '{}' .format (i ))
489+ np .testing .assert_allclose (objectives ['perf' ], objectives ['nonperf' ])
490+ np .testing .assert_allclose (obj_diffs ['perf' ], obj_diffs ['nonperf' ])
491+ np .testing .assert_allclose (total_active ['perf' ], total_active ['nonperf' ])
492+ np .testing .assert_allclose (learn_rate ['perf' ], learn_rate ['nonperf' ])
493+
494+
369495@pytest .mark .parametrize ('X, y, loss' , [(np .array ([[0 ], [1 ], [2 ], [3 ]]),
370496 [1 , 1 , 0 , 0 ], 3.0 ),
371497 (np .array ([[0 ], [1 ], [2 ], [3 ]]),
@@ -386,7 +512,7 @@ def test_toy_ex_lmnn(X, y, loss):
386512 lmnn .components_ = np .eye (n_components )
387513
388514 target_neighbors = lmnn ._select_targets (X , label_inds )
389- impostors = lmnn ._find_impostors (target_neighbors [:, - 1 ], X , label_inds )
515+ impostors = lmnn ._find_impostors (target_neighbors [:, - 1 ], X , label_inds , L )
390516
391517 # sum outer products
392518 dfG = _sum_outer_products (X , target_neighbors .flatten (),
@@ -401,9 +527,8 @@ def test_toy_ex_lmnn(X, y, loss):
401527 a2 [nn_idx ] = np .array ([])
402528
403529 # assert that the loss equals the one computed by hand
404- assert lmnn ._loss_grad (X , L .reshape (- 1 , X .shape [1 ]), dfG , impostors , 1 , k ,
405- reg , target_neighbors , df , a1 , a2 )[1 ] == loss
406-
530+ assert lmnn ._loss_grad (X , L .reshape (- 1 , X .shape [1 ]), dfG , k ,
531+ reg , target_neighbors , label_inds )[1 ] == loss
407532
408533def test_convergence_simple_example (capsys ):
409534 # LMNN should converge on this simple example, which it did not with
0 commit comments