{ "id": "1502.02763", "version": "v1", "published": "2015-02-10T02:56:04.000Z", "updated": "2015-02-10T02:56:04.000Z", "title": "Cascading Bandits", "authors": [ "Branislav Kveton", "Csaba Szepesvari", "Zheng Wen", "Azin Ashkan" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "The cascade model is a well-established model of user interaction with content. In this work, we propose cascading bandits, a learning variant of the model where the objective is to learn $K$ most attractive items out of $L$ ground items. We cast the problem as a stochastic combinatorial bandit with a non-linear reward function and partially observed weights of items. Both of these are challenging in the context of combinatorial bandits. We propose two computationally-efficient algorithms for our problem, CascadeUCB1 and CascadeKL-UCB, and prove gap-dependent upper bounds on their regret. We also derive a lower bound for cascading bandits and show that it matches the upper bound of CascadeKL-UCB up to a logarithmic factor. Finally, we evaluate our algorithms on synthetic problems. Our experiments demonstrate that the algorithms perform well and robustly even when our modeling assumptions are violated.", "revisions": [ { "version": "v1", "updated": "2015-02-10T02:56:04.000Z" } ], "analyses": { "keywords": [ "cascading bandits", "stochastic combinatorial bandit", "gap-dependent upper bounds", "cascadekl-ucb", "non-linear reward function" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }