{ "id": "2204.10782", "version": "v1", "published": "2022-04-22T15:56:43.000Z", "updated": "2022-04-22T15:56:43.000Z", "title": "On Feature Learning in Neural Networks with Global Convergence Guarantees", "authors": [ "Zhengdao Chen", "Eric Vanden-Eijnden", "Joan Bruna" ], "comment": "Accepted by the 10th International Conference on Learning Representations (ICLR 2022)", "categories": [ "cs.LG", "math.OC", "math.PR", "stat.ML" ], "abstract": "We study the optimization of wide neural networks (NNs) via gradient flow (GF) in setups that allow feature learning while admitting non-asymptotic global convergence guarantees. First, for wide shallow NNs under the mean-field scaling and with a general class of activation functions, we prove that when the input dimension is no less than the size of the training set, the training loss converges to zero at a linear rate under GF. Building upon this analysis, we study a model of wide multi-layer NNs whose second-to-last layer is trained via GF, for which we also prove a linear-rate convergence of the training loss to zero, but regardless of the input dimension. We also show empirically that, unlike in the Neural Tangent Kernel (NTK) regime, our multi-layer model exhibits feature learning and can achieve better generalization performance than its NTK counterpart.", "revisions": [ { "version": "v1", "updated": "2022-04-22T15:56:43.000Z" } ], "analyses": { "keywords": [ "feature learning", "input dimension", "achieve better generalization performance", "admitting non-asymptotic global convergence guarantees", "training loss" ], "tags": [ "conference paper" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }