{
  "id": "2305.03784",
  "version": "v1",
  "published": "2023-05-05T18:34:49.000Z",
  "updated": "2023-05-05T18:34:49.000Z",
  "title": "Neural Exploitation and Exploration of Contextual Bandits",
  "authors": [
    "Yikun Ban",
    "Yuchen Yan",
    "Arindam Banerjee",
    "Jingrui He"
  ],
  "comment": "Journal Version of EE-Net. arXiv admin note: substantial text overlap with arXiv:2110.03177",
  "categories": [
    "cs.LG"
  ],
  "abstract": "In this paper, we study utilizing neural networks for the exploitation and exploration of contextual multi-armed bandits. Contextual multi-armed bandits have been studied for decades with various applications. To solve the exploitation-exploration trade-off in bandits, there are three main techniques: epsilon-greedy, Thompson Sampling (TS), and Upper Confidence Bound (UCB). In recent literature, a series of neural bandit algorithms have been proposed to adapt to the non-linear reward function, combined with TS or UCB strategies for exploration. In this paper, instead of calculating a large-deviation based statistical bound for exploration like previous methods, we propose, ``EE-Net,'' a novel neural-based exploitation and exploration strategy. In addition to using a neural network (Exploitation network) to learn the reward function, EE-Net uses another neural network (Exploration network) to adaptively learn the potential gains compared to the currently estimated reward for exploration. We provide an instance-based $\\widetilde{\\mathcal{O}}(\\sqrt{T})$ regret upper bound for EE-Net and show that EE-Net outperforms related linear and neural contextual bandit baselines on real-world datasets.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-05-05T18:34:49.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "exploration",
      "neural exploitation",
      "contextual multi-armed bandits",
      "neural contextual bandit baselines",
      "regret upper bound"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}