{
  "id": "1810.03024",
  "version": "v1",
  "published": "2018-10-06T17:04:14.000Z",
  "updated": "2018-10-06T17:04:14.000Z",
  "title": "Learning to Optimize under Non-Stationarity",
  "authors": [
    "Wang Chi Cheung",
    "David Simchi-Levi",
    "Ruihao Zhu"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "We introduce algorithms that achieve state-of-the-art \\emph{dynamic regret} bounds for non-stationary linear stochastic bandits setting. It captures natural applications such as dynamic pricing and ads allocation in a changing environment. We show how the difficulty posed by the (possibly adversarial) non-stationarity can be overcome by a novel marriage between stochastic and adversarial bandits learning algorithms. Defining $d,B_T,$ and $T$ as the problem dimension, the \\emph{variation budget}, and the total time horizon, respectively, our main contributions are the tuned Sliding Window UCB (\\texttt{SW-UCB}) algorithm with optimal $\\widetilde{O}(d^{2/3}(B_T+1)^{1/3}T^{2/3})$ dynamic regret, and the tuning free bandits-over-bandits (\\texttt{BOB}) framework built on top of the \\texttt{SW-UCB} algorithm with best $\\widetilde{O}(d^{2/3}(B_T+1)^{1/4}T^{3/4})$ dynamic regret.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2018-10-06T17:04:14.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "non-stationarity",
      "dynamic regret",
      "non-stationary linear stochastic bandits setting",
      "captures natural applications",
      "total time horizon"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}