{
  "id": "1711.03953",
  "version": "v1",
  "published": "2017-11-10T18:29:00.000Z",
  "updated": "2017-11-10T18:29:00.000Z",
  "title": "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model",
  "authors": [
    "Zhilin Yang",
    "Zihang Dai",
    "Ruslan Salakhutdinov",
    "William W. Cohen"
  ],
  "categories": [
    "cs.CL",
    "cs.LG"
  ],
  "abstract": "We formulate language modeling as a matrix factorization problem, and show that the expressiveness of Softmax-based models (including the majority of neural language models) is limited by a Softmax bottleneck. Given that natural language is highly context-dependent, this further implies that in practice Softmax with distributed word embeddings does not have enough capacity to model natural language. We propose a simple and effective method to address this issue, and improve the state-of-the-art perplexities on Penn Treebank and WikiText-2 to 47.69 and 40.68 respectively.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-11-10T18:29:00.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "high-rank rnn language model",
      "softmax bottleneck",
      "matrix factorization problem",
      "neural language models",
      "model natural language"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}