{
  "id": "1803.04623",
  "version": "v1",
  "published": "2018-03-13T05:08:47.000Z",
  "updated": "2018-03-13T05:08:47.000Z",
  "title": "Thompson Sampling for Combinatorial Semi-Bandits",
  "authors": [
    "Siwei Wang",
    "Wei Chen"
  ],
  "categories": [
    "cs.LG"
  ],
  "abstract": "We study the application of the Thompson Sampling (TS) methodology to the stochastic combinatorial multi-armed bandit (CMAB) framework. We analyze the standard TS algorithm for the general CMAB, and obtain the first distribution-dependent regret bound of $O(m\\log T / \\Delta_{\\min}) $ for TS under general CMAB, where $m$ is the number of arms, $T$ is the time horizon, and $\\Delta_{\\min}$ is the minimum gap between the expected reward of the optimal solution and any non-optimal solution. We also show that one can not use an approximate oracle in TS algorithm for even MAB problems. Then we expand the analysis to matroid bandit, a special case of CMAB. Finally, we use some experiments to show the comparison of regrets of CUCB and CTS algorithms.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2018-03-13T05:08:47.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "thompson sampling",
      "combinatorial semi-bandits",
      "general cmab",
      "first distribution-dependent regret bound",
      "stochastic combinatorial multi-armed bandit"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}