{ "id": "1405.3229", "version": "v1", "published": "2014-05-13T16:51:54.000Z", "updated": "2014-05-13T16:51:54.000Z", "title": "Rate of Convergence and Error Bounds for LSTD($λ$)", "authors": [ "Manel Tagorti", "Bruno Scherrer" ], "comment": "(2014)", "categories": [ "cs.LG", "cs.AI", "math.OC", "math.ST", "stat.TH" ], "abstract": "We consider LSTD($\\lambda$), the least-squares temporal-difference algorithm with eligibility traces algorithm proposed by Boyan (2002). It computes a linear approximation of the value function of a fixed policy in a large Markov Decision Process. Under a $\\beta$-mixing assumption, we derive, for any value of $\\lambda \\in (0,1)$, a high-probability estimate of the rate of convergence of this algorithm to its limit. We deduce a high-probability bound on the error of this algorithm, that extends (and slightly improves) that derived by Lazaric et al. (2012) in the specific case where $\\lambda=0$. In particular, our analysis sheds some light on the choice of $\\lambda$ with respect to the quality of the chosen linear space and the number of samples, that complies with simulations.", "revisions": [ { "version": "v1", "updated": "2014-05-13T16:51:54.000Z" } ], "analyses": { "keywords": [ "error bounds", "convergence", "large markov decision process", "eligibility traces algorithm", "least-squares temporal-difference algorithm" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2014arXiv1405.3229T" } } }