{ "id": "1802.08009", "version": "v1", "published": "2018-02-22T12:27:40.000Z", "updated": "2018-02-22T12:27:40.000Z", "title": "Iterate averaging as regularization for stochastic gradient descent", "authors": [ "Gergely Neu", "Lorenzo Rosasco" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "We propose and analyze a variant of the classic Polyak-Ruppert averaging scheme, broadly used in stochastic gradient methods. Rather than a uniform average of the iterates, we consider a weighted average, with weights decaying in a geometric fashion. In the context of linear least squares regression, we show that this averaging scheme has a the same regularizing effect, and indeed is asymptotically equivalent, to ridge regression. In particular, we derive finite-sample bounds for the proposed approach that match the best known results for regularized stochastic gradient methods.", "revisions": [ { "version": "v1", "updated": "2018-02-22T12:27:40.000Z" } ], "analyses": { "keywords": [ "stochastic gradient descent", "iterate averaging", "regularization", "regularized stochastic gradient methods", "classic polyak-ruppert averaging scheme" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }