{ "id": "1810.03024", "version": "v1", "published": "2018-10-06T17:04:14.000Z", "updated": "2018-10-06T17:04:14.000Z", "title": "Learning to Optimize under Non-Stationarity", "authors": [ "Wang Chi Cheung", "David Simchi-Levi", "Ruihao Zhu" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "We introduce algorithms that achieve state-of-the-art \\emph{dynamic regret} bounds for non-stationary linear stochastic bandits setting. It captures natural applications such as dynamic pricing and ads allocation in a changing environment. We show how the difficulty posed by the (possibly adversarial) non-stationarity can be overcome by a novel marriage between stochastic and adversarial bandits learning algorithms. Defining $d,B_T,$ and $T$ as the problem dimension, the \\emph{variation budget}, and the total time horizon, respectively, our main contributions are the tuned Sliding Window UCB (\\texttt{SW-UCB}) algorithm with optimal $\\widetilde{O}(d^{2/3}(B_T+1)^{1/3}T^{2/3})$ dynamic regret, and the tuning free bandits-over-bandits (\\texttt{BOB}) framework built on top of the \\texttt{SW-UCB} algorithm with best $\\widetilde{O}(d^{2/3}(B_T+1)^{1/4}T^{3/4})$ dynamic regret.", "revisions": [ { "version": "v1", "updated": "2018-10-06T17:04:14.000Z" } ], "analyses": { "keywords": [ "non-stationarity", "dynamic regret", "non-stationary linear stochastic bandits setting", "captures natural applications", "total time horizon" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }