{
  "id": "1110.4322",
  "version": "v3",
  "published": "2011-10-19T15:57:27.000Z",
  "updated": "2012-02-14T16:14:39.000Z",
  "title": "An Optimal Algorithm for Linear Bandits",
  "authors": [
    "Nicolò Cesa-Bianchi",
    "Sham Kakade"
  ],
  "comment": "This paper is superseded by S. Bubeck, N. Cesa-Bianchi, and S.M. Kakade, \"Towards minimax policies for online linear optimization with bandit feedback\"",
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "We provide the first algorithm for online bandit linear optimization whose regret after T rounds is of order sqrt{Td ln N} on any finite class X of N actions in d dimensions, and of order d*sqrt{T} (up to log factors) when X is infinite. These bounds are not improvable in general. The basic idea utilizes tools from convex geometry to construct what is essentially an optimal exploration basis. We also present an application to a model of linear bandits with expert advice. Interestingly, these results show that bandit linear optimization with expert advice in d dimensions is no more difficult (in terms of the achievable regret) than the online d-armed bandit problem with expert advice (where EXP4 is optimal).",
  "revisions": [
    {
      "version": "v3",
      "updated": "2012-02-14T16:14:39.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "linear bandits",
      "optimal algorithm",
      "expert advice",
      "online bandit linear optimization",
      "basic idea utilizes tools"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2011arXiv1110.4322C"
    }
  }
}