{
  "id": "2210.00701",
  "version": "v1",
  "published": "2022-10-03T03:48:26.000Z",
  "updated": "2022-10-03T03:48:26.000Z",
  "title": "Near-Optimal Deployment Efficiency in Reward-Free Reinforcement Learning with Linear Function Approximation",
  "authors": [
    "Dan Qiao",
    "Yu-Xiang Wang"
  ],
  "comment": "47 pages",
  "categories": [
    "cs.LG",
    "cs.AI",
    "stat.ML"
  ],
  "abstract": "We study the problem of deployment efficient reinforcement learning (RL) with linear function approximation under the \\emph{reward-free} exploration setting. This is a well-motivated problem because deploying new policies is costly in real-life RL applications. Under the linear MDP setting with feature dimension $d$ and planning horizon $H$, we propose a new algorithm that collects at most $\\widetilde{O}(\\frac{d^2H^5}{\\epsilon^2})$ trajectories within $H$ deployments to identify $\\epsilon$-optimal policy for any (possibly data-dependent) choice of reward functions. To the best of our knowledge, our approach is the first to achieve optimal deployment complexity and optimal $d$ dependence in sample complexity at the same time, even if the reward is known ahead of time. Our novel techniques include an exploration-preserving policy discretization and a generalized G-optimal experiment design, which could be of independent interest. Lastly, we analyze the related problem of regret minimization in low-adaptive RL and provide information-theoretic lower bounds for switching cost and batch complexity.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-10-03T03:48:26.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "linear function approximation",
      "near-optimal deployment efficiency",
      "reward-free reinforcement learning",
      "achieve optimal deployment complexity",
      "deployment efficient reinforcement"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 47,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}