{
  "id": "1905.02249",
  "version": "v1",
  "published": "2019-05-06T19:56:03.000Z",
  "updated": "2019-05-06T19:56:03.000Z",
  "title": "MixMatch: A Holistic Approach to Semi-Supervised Learning",
  "authors": [
    "David Berthelot",
    "Nicholas Carlini",
    "Ian Goodfellow",
    "Nicolas Papernot",
    "Avital Oliver",
    "Colin Raffel"
  ],
  "categories": [
    "cs.LG",
    "cs.AI",
    "cs.CV",
    "stat.ML"
  ],
  "abstract": "Semi-supervised learning has proven to be a powerful paradigm for leveraging unlabeled data to mitigate the reliance on large labeled datasets. In this work, we unify the current dominant approaches for semi-supervised learning to produce a new algorithm, MixMatch, that works by guessing low-entropy labels for data-augmented unlabeled examples and mixing labeled and unlabeled data using MixUp. We show that MixMatch obtains state-of-the-art results by a large margin across many datasets and labeled data amounts. For example, on CIFAR-10 with 250 labels, we reduce error rate by a factor of 4 (from 38% to 11%) and by a factor of 2 on STL-10. We also demonstrate how MixMatch can help achieve a dramatically better accuracy-privacy trade-off for differential privacy. Finally, we perform an ablation study to tease apart which components of MixMatch are most important for its success.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2019-05-06T19:56:03.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "semi-supervised learning",
      "holistic approach",
      "unlabeled data",
      "reduce error rate",
      "dramatically better accuracy-privacy trade-off"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}