{ "id": "2009.13077", "version": "v1", "published": "2020-09-28T04:57:23.000Z", "updated": "2020-09-28T04:57:23.000Z", "title": "Distribution Matching for Crowd Counting", "authors": [ "Boyu Wang", "Huidong Liu", "Dimitris Samaras", "Minh Hoai" ], "comment": "NeurIPS 2020", "categories": [ "cs.CV" ], "abstract": "In crowd counting, each training image contains multiple people, where each person is annotated by a dot. Existing crowd counting methods need to use a Gaussian to smooth each annotated dot or to estimate the likelihood of every pixel given the annotated point. In this paper, we show that imposing Gaussians to annotations hurts generalization performance. Instead, we propose to use Distribution Matching for crowd COUNTing (DM-Count). In DM-Count, we use Optimal Transport (OT) to measure the similarity between the normalized predicted density map and the normalized ground truth density map. To stabilize OT computation, we include a Total Variation loss in our model. We show that the generalization error bound of DM-Count is tighter than that of the Gaussian smoothed methods. In terms of Mean Absolute Error, DM-Count outperforms the previous state-of-the-art methods by a large margin on two large-scale counting datasets, UCF-QNRF and NWPU, and achieves the state-of-the-art results on the ShanghaiTech and UCF-CC50 datasets. Notably, DM-Count ranked first on the leaderboard for the NWPU benchmark, reducing the error of the state-of-the-art published result by approximately 16%. Code is available at https://github.com/cvlab-stonybrook/DM-Count.", "revisions": [ { "version": "v1", "updated": "2020-09-28T04:57:23.000Z" } ], "analyses": { "keywords": [ "distribution matching", "crowd counting methods", "normalized ground truth density map", "training image contains multiple people", "annotations hurts generalization performance" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }