{
  "id": "1212.1824",
  "version": "v2",
  "published": "2012-12-08T18:22:42.000Z",
  "updated": "2012-12-28T10:58:48.000Z",
  "title": "Stochastic Gradient Descent for Non-smooth Optimization: Convergence Results and Optimal Averaging Schemes",
  "authors": [
    "Ohad Shamir",
    "Tong Zhang"
  ],
  "comment": "To appear in ICML 2013",
  "categories": [
    "cs.LG",
    "math.OC",
    "stat.ML"
  ],
  "abstract": "Stochastic Gradient Descent (SGD) is one of the simplest and most popular stochastic optimization methods. While it has already been theoretically studied for decades, the classical analysis usually required non-trivial smoothness assumptions, which do not apply to many modern applications of SGD with non-smooth objective functions such as support vector machines. In this paper, we investigate the performance of SGD without such smoothness assumptions, as well as a running average scheme to convert the SGD iterates to a solution with optimal optimization accuracy. In this framework, we prove that after T rounds, the suboptimality of the last SGD iterate scales as O(log(T)/\\sqrt{T}) for non-smooth convex objective functions, and O(log(T)/T) in the non-smooth strongly convex case. To the best of our knowledge, these are the first bounds of this kind, and almost match the minimax-optimal rates obtainable by appropriate averaging schemes. We also propose a new and simple averaging scheme, which not only attains optimal rates, but can also be easily computed on-the-fly (in contrast, the suffix averaging scheme proposed in Rakhlin et al. (2011) is not as simple to implement). Finally, we provide some experimental illustrations.",
  "revisions": [
    {
      "version": "v2",
      "updated": "2012-12-28T10:58:48.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "stochastic gradient descent",
      "optimal averaging schemes",
      "non-smooth optimization",
      "convergence results",
      "sgd iterate"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2012arXiv1212.1824S"
    }
  }
}