{ "id": "2010.12711", "version": "v1", "published": "2020-10-23T23:41:03.000Z", "updated": "2020-10-23T23:41:03.000Z", "title": "On Convergence and Generalization of Dropout Training", "authors": [ "Poorya Mianjy", "Raman Arora" ], "journal": "In Proceedings of Advances in Neural Information Processing Systems (NeurIPS), 2020", "categories": [ "cs.LG", "stat.ML" ], "abstract": "We study dropout in two-layer neural networks with rectified linear unit (ReLU) activations. Under mild overparametrization and assuming that the limiting kernel can separate the data distribution with a positive margin, we show that dropout training with logistic loss achieves $\\epsilon$-suboptimality in test error in $O(1/\\epsilon)$ iterations.", "revisions": [ { "version": "v1", "updated": "2020-10-23T23:41:03.000Z" } ], "analyses": { "keywords": [ "dropout training", "convergence", "generalization", "two-layer neural networks", "logistic loss achieves" ], "tags": [ "journal article" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }