You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
abstract = {First-order stochastic methods are the state-of-the-art in large-scale machine learning optimization owing to efficient per-iteration complexity. Second-order methods, while able to provide faster convergence, have been much less explored due to the high cost of computing the second-order information. In this paper we develop second-order stochastic methods for optimization problems in machine learning that match the per-iteration cost of gradient based methods, and in certain settings improve upon the overall running time over popular first-order methods. Furthermore, our algorithm has the desirable property of being implementable in time linear in the sparsity of the input data.},
author = {Broderick, Tamara and Giordano, Ryan and Meager, Rachael},
68
68
date = {2021-11-03},
69
69
eprint = {2011.14999},
70
-
eprinttype = {arxiv},
70
+
eprinttype = {arXiv},
71
71
url = {https://arxiv.org/abs/2011.14999},
72
72
abstract = {We propose a method to assess the sensitivity of econometric analyses to the removal of a small fraction of the data. Manually checking the influence of all possible small subsets is computationally infeasible, so we provide an approximation to find the most influential subset. Our metric, the "Approximate Maximum Influence Perturbation," is automatically computable for common methods including (but not limited to) OLS, IV, MLE, GMM, and variational Bayes. We provide finite-sample error bounds on approximation performance. At minimal extra cost, we provide an exact finite-sample lower bound on sensitivity. We find that sensitivity is driven by a signal-to-noise ratio in the inference problem, is not reflected in standard errors, does not disappear asymptotically, and is not due to misspecification. While some empirical applications are robust, results of several economics papers can be overturned by removing less than 1\% of the sample.},
author = {Watson, Lauren and Kujawa, Zeno and Andreeva, Rayna and Yang, Hao-Tsung and Elahi, Tariq and Sarkar, Rik},
502
502
date = {2023-11-09},
503
503
eprint = {2311.05346},
504
-
eprinttype = {arxiv},
504
+
eprinttype = {arXiv},
505
505
eprintclass = {cs},
506
506
doi = {10.48550/arXiv.2311.05346},
507
507
url = {https://arxiv.org/abs/2311.05346},
508
508
urldate = {2023-12-07},
509
509
abstract = {Data valuation has found various applications in machine learning, such as data filtering, efficient learning and incentives for data sharing. The most popular current approach to data valuation is the Shapley value. While popular for its various applications, Shapley value is computationally expensive even to approximate, as it requires repeated iterations of training models on different subsets of data. In this paper we show that the Shapley value of data points can be approximated more efficiently by leveraging the structural properties of machine learning problems. We derive convergence guarantees on the accuracy of the approximate Shapley value for different learning settings including Stochastic Gradient Descent with convex and non-convex loss functions. Our analysis suggests that in fact models trained on small subsets are more important in the context of data valuation. Based on this idea, we describe \$\textbackslash delta\$-Shapley -- a strategy of only using small subsets for the approximation. Experiments show that this approach preserves approximate value and rank of data, while achieving speedup of up to 9.9x. In pre-trained networks the approach is found to bring more efficiency in terms of accurate evaluation using small subsets.},
510
-
pubstate = {preprint}
510
+
pubstate = {prepublished}
511
511
}
512
512
513
513
@inproceedings{wu_davinz_2022,
@@ -528,7 +528,7 @@ @inproceedings{wu_davinz_2022
528
528
529
529
@inproceedings{yan_if_2021,
530
530
title = {If {{You Like Shapley Then You}}’ll {{Love}} the {{Core}}},
531
-
booktitle = {Proceedings of the 35th {{AAAI Conference}} on {{Artificial Intelligence}}, 2021},
531
+
booktitle = {Proceedings of the 35th {{AAAI Conference}} on {{Artificial Intelligence}}},
532
532
author = {Yan, Tom and Procaccia, Ariel D.},
533
533
date = {2021-05-18},
534
534
volume = {6},
@@ -543,3 +543,15 @@ @inproceedings{yan_if_2021
543
543
langid = {english},
544
544
keywords = {notion}
545
545
}
546
+
547
+
@inproceedings{zaheer_deep_2017,
548
+
title = {Deep {{Sets}}},
549
+
booktitle = {Advances in {{Neural Information Processing Systems}}},
550
+
author = {Zaheer, Manzil and Kottur, Satwik and Ravanbakhsh, Siamak and Poczos, Barnabas and Salakhutdinov, Russ R and Smola, Alexander J},
abstract = {We study the problem of designing models for machine learning tasks defined on sets. In contrast to the traditional approach of operating on fixed dimensional vectors, we consider objective functions defined on sets and are invariant to permutations. Such problems are widespread, ranging from the estimation of population statistics, to anomaly detection in piezometer data of embankment dams, to cosmology. Our main theorem characterizes the permutation invariant objective functions and provides a family of functions to which any permutation invariant objective function must belong. This family of functions has a special structure which enables us to design a deep network architecture that can operate on sets and which can be deployed on a variety of scenarios including both unsupervised and supervised learning tasks. We demonstrate the applicability of our method on population statistic estimation, point cloud classification, set expansion, and outlier detection.}
0 commit comments