{ "id": "1810.06530", "version": "v1", "published": "2018-10-15T17:30:53.000Z", "updated": "2018-10-15T17:30:53.000Z", "title": "Successor Uncertainties: exploration and uncertainty in temporal difference learning", "authors": [ "David Janz", "Jiri Hron", "José Miguel Hernández-Lobato", "Katja Hofmann", "Sebastian Tschiatschek" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "We consider the problem of balancing exploration and exploitation in sequential decision making problems. To explore efficiently, it is vital to consider the uncertainty over all consequences of a decision, and not just those that follow immediately; the uncertainties involved need to be propagated according to the dynamics of the problem. To this end, we develop Successor Uncertainties, a probabilistic model for the state-action value function of a Markov Decision Process that propagates uncertainties in a coherent and scalable way. We relate our approach to other classical and contemporary methods for exploration and present an empirical analysis.", "revisions": [ { "version": "v1", "updated": "2018-10-15T17:30:53.000Z" } ], "analyses": { "keywords": [ "uncertainty", "successor uncertainties", "temporal difference learning", "exploration", "markov decision process" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }