{
  "id": "1711.00941",
  "version": "v1",
  "published": "2017-11-02T21:21:11.000Z",
  "updated": "2017-11-02T21:21:11.000Z",
  "title": "Deep Active Learning over the Long Tail",
  "authors": [
    "Yonatan Geifman",
    "Ran El-Yaniv"
  ],
  "categories": [
    "cs.LG"
  ],
  "abstract": "This paper is concerned with pool-based active learning for deep neural networks. Motivated by coreset dataset compression ideas, we present a novel active learning algorithm that queries consecutive points from the pool using farthest-first traversals in the space of neural activation over a representation layer. We show consistent and overwhelming improvement in sample complexity over passive learning (random sampling) for three datasets: MNIST, CIFAR-10, and CIFAR-100. In addition, our algorithm outperforms the traditional uncertainty sampling technique (obtained using softmax activations), and we identify cases where uncertainty sampling is only slightly better than random sampling.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-11-02T21:21:11.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "deep active learning",
      "long tail",
      "coreset dataset compression ideas",
      "deep neural networks",
      "traditional uncertainty sampling technique"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}