{ "id": "1803.04623", "version": "v1", "published": "2018-03-13T05:08:47.000Z", "updated": "2018-03-13T05:08:47.000Z", "title": "Thompson Sampling for Combinatorial Semi-Bandits", "authors": [ "Siwei Wang", "Wei Chen" ], "categories": [ "cs.LG" ], "abstract": "We study the application of the Thompson Sampling (TS) methodology to the stochastic combinatorial multi-armed bandit (CMAB) framework. We analyze the standard TS algorithm for the general CMAB, and obtain the first distribution-dependent regret bound of $O(m\\log T / \\Delta_{\\min}) $ for TS under general CMAB, where $m$ is the number of arms, $T$ is the time horizon, and $\\Delta_{\\min}$ is the minimum gap between the expected reward of the optimal solution and any non-optimal solution. We also show that one can not use an approximate oracle in TS algorithm for even MAB problems. Then we expand the analysis to matroid bandit, a special case of CMAB. Finally, we use some experiments to show the comparison of regrets of CUCB and CTS algorithms.", "revisions": [ { "version": "v1", "updated": "2018-03-13T05:08:47.000Z" } ], "analyses": { "keywords": [ "thompson sampling", "combinatorial semi-bandits", "general cmab", "first distribution-dependent regret bound", "stochastic combinatorial multi-armed bandit" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }