{
  "id": "2006.16495",
  "version": "v1",
  "published": "2020-06-30T02:59:35.000Z",
  "updated": "2020-06-30T02:59:35.000Z",
  "title": "Guarantees for Tuning the Step Size using a Learning-to-Learn Approach",
  "authors": [
    "Xiang Wang",
    "Shuai Yuan",
    "Chenwei Wu",
    "Rong Ge"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "Learning-to-learn (using optimization algorithms to learn a new optimizer) has successfully trained efficient optimizers in practice. This approach relies on meta-gradient descent on a meta-objective based on the trajectory that the optimizer generates. However, there were few theoretical guarantees on how to avoid meta-gradient explosion/vanishing problems, or how to train an optimizer with good generalization performance. In this paper, we study the learning-to-learn approach on a simple problem of tuning the step size for quadratic loss. Our results show that although there is a way to design the meta-objective so that the meta-gradient remain polynomially bounded, computing the meta-gradient directly using backpropagation leads to numerical issues that look similar to gradient explosion/vanishing problems. We also characterize when it is necessary to compute the meta-objective on a separate validation set instead of the original training set. Finally, we verify our results empirically and show that a similar phenomenon appears even for more complicated learned optimizers parametrized by neural networks.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-06-30T02:59:35.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "learning-to-learn approach",
      "step size",
      "guarantees",
      "avoid meta-gradient explosion/vanishing problems",
      "separate validation set"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}