{ "id": "2403.08364", "version": "v1", "published": "2024-03-13T09:24:59.000Z", "updated": "2024-03-13T09:24:59.000Z", "title": "Decoupled Federated Learning on Long-Tailed and Non-IID data with Feature Statistics", "authors": [ "Zhuoxin Chen", "Zhenyu Wu", "Yang Ji" ], "categories": [ "cs.LG", "cs.AI" ], "abstract": "Federated learning is designed to enhance data security and privacy, but faces challenges when dealing with heterogeneous data in long-tailed and non-IID distributions. This paper explores an overlooked scenario where tail classes are sparsely distributed over a few clients, causing the models trained with these classes to have a lower probability of being selected during client aggregation, leading to slower convergence rates and poorer model performance. To address this issue, we propose a two-stage Decoupled Federated learning framework using Feature Statistics (DFL-FS). In the first stage, the server estimates the client's class coverage distributions through masked local feature statistics clustering to select models for aggregation to accelerate convergence and enhance feature learning without privacy leakage. In the second stage, DFL-FS employs federated feature regeneration based on global feature statistics and utilizes resampling and weighted covariance to calibrate the global classifier to enhance the model's adaptability to long-tailed data distributions. We conducted experiments on CIFAR10-LT and CIFAR100-LT datasets with various long-tailed rates. The results demonstrate that our method outperforms state-of-the-art methods in both accuracy and convergence rate.", "revisions": [ { "version": "v1", "updated": "2024-03-13T09:24:59.000Z" } ], "analyses": { "keywords": [ "non-iid data", "local feature statistics clustering", "decoupled federated learning framework", "method outperforms state-of-the-art methods", "dfl-fs employs federated feature regeneration" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }