{ "id": "1711.03953", "version": "v1", "published": "2017-11-10T18:29:00.000Z", "updated": "2017-11-10T18:29:00.000Z", "title": "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model", "authors": [ "Zhilin Yang", "Zihang Dai", "Ruslan Salakhutdinov", "William W. Cohen" ], "categories": [ "cs.CL", "cs.LG" ], "abstract": "We formulate language modeling as a matrix factorization problem, and show that the expressiveness of Softmax-based models (including the majority of neural language models) is limited by a Softmax bottleneck. Given that natural language is highly context-dependent, this further implies that in practice Softmax with distributed word embeddings does not have enough capacity to model natural language. We propose a simple and effective method to address this issue, and improve the state-of-the-art perplexities on Penn Treebank and WikiText-2 to 47.69 and 40.68 respectively.", "revisions": [ { "version": "v1", "updated": "2017-11-10T18:29:00.000Z" } ], "analyses": { "keywords": [ "high-rank rnn language model", "softmax bottleneck", "matrix factorization problem", "neural language models", "model natural language" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }