{ "id": "1711.00941", "version": "v1", "published": "2017-11-02T21:21:11.000Z", "updated": "2017-11-02T21:21:11.000Z", "title": "Deep Active Learning over the Long Tail", "authors": [ "Yonatan Geifman", "Ran El-Yaniv" ], "categories": [ "cs.LG" ], "abstract": "This paper is concerned with pool-based active learning for deep neural networks. Motivated by coreset dataset compression ideas, we present a novel active learning algorithm that queries consecutive points from the pool using farthest-first traversals in the space of neural activation over a representation layer. We show consistent and overwhelming improvement in sample complexity over passive learning (random sampling) for three datasets: MNIST, CIFAR-10, and CIFAR-100. In addition, our algorithm outperforms the traditional uncertainty sampling technique (obtained using softmax activations), and we identify cases where uncertainty sampling is only slightly better than random sampling.", "revisions": [ { "version": "v1", "updated": "2017-11-02T21:21:11.000Z" } ], "analyses": { "keywords": [ "deep active learning", "long tail", "coreset dataset compression ideas", "deep neural networks", "traditional uncertainty sampling technique" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }