{ "id": "1807.07547", "version": "v1", "published": "2018-07-19T17:33:30.000Z", "updated": "2018-07-19T17:33:30.000Z", "title": "Partial recovery bounds for clustering with the relaxed $K$means", "authors": [ "Christophe Giraud", "Nicolas Verzelen" ], "comment": "33 pages", "categories": [ "math.ST", "cs.LG", "stat.TH" ], "abstract": "We investigate the clustering performances of the relaxed $K$means in the setting of sub-Gaussian Mixture Model (sGMM) and Stochastic Block Model (SBM). After identifying the appropriate signal-to-noise ratio (SNR), we prove that the misclassification error decay exponentially fast with respect to this SNR. These partial recovery bounds for the relaxed $K$means improve upon results currently known in the sGMM setting. In the SBM setting, applying the relaxed $K$means SDP allows to handle general connection probabilities whereas other SDPs investigated in the literature are restricted to the assortative case (where within group probabilities are larger than between group probabilities). Again, this partial recovery bound complements the state-of-the-art results. All together, these results put forward the versatility of the relaxed $K$means.", "revisions": [ { "version": "v1", "updated": "2018-07-19T17:33:30.000Z" } ], "analyses": { "subjects": [ "62H30", "68T10" ], "keywords": [ "misclassification error decay exponentially fast", "handle general connection probabilities", "group probabilities", "partial recovery bound complements", "sub-gaussian mixture model" ], "note": { "typesetting": "TeX", "pages": 33, "language": "en", "license": "arXiv", "status": "editable" } } }