{ "id": "1712.05440", "version": "v1", "published": "2017-12-14T20:31:29.000Z", "updated": "2017-12-14T20:31:29.000Z", "title": "Nonparametric Neural Networks", "authors": [ "George Philipp", "Jaime G. Carbonell" ], "comment": "ICLR 2017", "categories": [ "cs.LG", "cs.GT" ], "abstract": "Automatically determining the optimal size of a neural network for a given task without prior information currently requires an expensive global search and training many networks from scratch. In this paper, we address the problem of automatically finding a good network size during a single training cycle. We introduce *nonparametric neural networks*, a non-probabilistic framework for conducting optimization over all possible network sizes and prove its soundness when network growth is limited via an L_p penalty. We train networks under this framework by continuously adding new units while eliminating redundant units via an L_2 penalty. We employ a novel optimization algorithm, which we term *adaptive radial-angular gradient descent* or *AdaRad*, and obtain promising results.", "revisions": [ { "version": "v1", "updated": "2017-12-14T20:31:29.000Z" } ], "analyses": { "keywords": [ "nonparametric neural networks", "novel optimization algorithm", "prior information", "eliminating redundant units", "expensive global search" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }