{
  "id": "2109.04912",
  "version": "v1",
  "published": "2021-09-10T14:49:44.000Z",
  "updated": "2021-09-10T14:49:44.000Z",
  "title": "ReasonBERT: Pre-trained to Reason with Distant Supervision",
  "authors": [
    "Xiang Deng",
    "Yu Su",
    "Alyssa Lees",
    "You Wu",
    "Cong Yu",
    "Huan Sun"
  ],
  "comment": "Accepted to EMNLP'2021. Our code and pre-trained models are available at https://github.com/sunlab-osu/ReasonBERT",
  "categories": [
    "cs.CL",
    "cs.AI",
    "cs.LG"
  ],
  "abstract": "We present ReasonBert, a pre-training method that augments language models with the ability to reason over long-range relations and multiple, possibly hybrid contexts. Unlike existing pre-training methods that only harvest learning signals from local contexts of naturally occurring texts, we propose a generalized notion of distant supervision to automatically connect multiple pieces of text and tables to create pre-training examples that require long-range reasoning. Different types of reasoning are simulated, including intersecting multiple pieces of evidence, bridging from one piece of evidence to another, and detecting unanswerable cases. We conduct a comprehensive evaluation on a variety of extractive question answering datasets ranging from single-hop to multi-hop and from text-only to table-only to hybrid that require various reasoning capabilities and show that ReasonBert achieves remarkable improvement over an array of strong baselines. Few-shot experiments further demonstrate that our pre-training method substantially improves sample efficiency.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2021-09-10T14:49:44.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "distant supervision",
      "reasonbert",
      "question answering datasets ranging",
      "pre-training method",
      "automatically connect multiple pieces"
    ],
    "tags": [
      "github project"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}