{ "id": "1110.4322", "version": "v3", "published": "2011-10-19T15:57:27.000Z", "updated": "2012-02-14T16:14:39.000Z", "title": "An Optimal Algorithm for Linear Bandits", "authors": [ "Nicolò Cesa-Bianchi", "Sham Kakade" ], "comment": "This paper is superseded by S. Bubeck, N. Cesa-Bianchi, and S.M. Kakade, \"Towards minimax policies for online linear optimization with bandit feedback\"", "categories": [ "cs.LG", "stat.ML" ], "abstract": "We provide the first algorithm for online bandit linear optimization whose regret after T rounds is of order sqrt{Td ln N} on any finite class X of N actions in d dimensions, and of order d*sqrt{T} (up to log factors) when X is infinite. These bounds are not improvable in general. The basic idea utilizes tools from convex geometry to construct what is essentially an optimal exploration basis. We also present an application to a model of linear bandits with expert advice. Interestingly, these results show that bandit linear optimization with expert advice in d dimensions is no more difficult (in terms of the achievable regret) than the online d-armed bandit problem with expert advice (where EXP4 is optimal).", "revisions": [ { "version": "v3", "updated": "2012-02-14T16:14:39.000Z" } ], "analyses": { "keywords": [ "linear bandits", "optimal algorithm", "expert advice", "online bandit linear optimization", "basic idea utilizes tools" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2011arXiv1110.4322C" } } }