{
  "id": "1502.02763",
  "version": "v1",
  "published": "2015-02-10T02:56:04.000Z",
  "updated": "2015-02-10T02:56:04.000Z",
  "title": "Cascading Bandits",
  "authors": [
    "Branislav Kveton",
    "Csaba Szepesvari",
    "Zheng Wen",
    "Azin Ashkan"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "The cascade model is a well-established model of user interaction with content. In this work, we propose cascading bandits, a learning variant of the model where the objective is to learn $K$ most attractive items out of $L$ ground items. We cast the problem as a stochastic combinatorial bandit with a non-linear reward function and partially observed weights of items. Both of these are challenging in the context of combinatorial bandits. We propose two computationally-efficient algorithms for our problem, CascadeUCB1 and CascadeKL-UCB, and prove gap-dependent upper bounds on their regret. We also derive a lower bound for cascading bandits and show that it matches the upper bound of CascadeKL-UCB up to a logarithmic factor. Finally, we evaluate our algorithms on synthetic problems. Our experiments demonstrate that the algorithms perform well and robustly even when our modeling assumptions are violated.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2015-02-10T02:56:04.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "cascading bandits",
      "stochastic combinatorial bandit",
      "gap-dependent upper bounds",
      "cascadekl-ucb",
      "non-linear reward function"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}