{ "id": "2109.04912", "version": "v1", "published": "2021-09-10T14:49:44.000Z", "updated": "2021-09-10T14:49:44.000Z", "title": "ReasonBERT: Pre-trained to Reason with Distant Supervision", "authors": [ "Xiang Deng", "Yu Su", "Alyssa Lees", "You Wu", "Cong Yu", "Huan Sun" ], "comment": "Accepted to EMNLP'2021. Our code and pre-trained models are available at https://github.com/sunlab-osu/ReasonBERT", "categories": [ "cs.CL", "cs.AI", "cs.LG" ], "abstract": "We present ReasonBert, a pre-training method that augments language models with the ability to reason over long-range relations and multiple, possibly hybrid contexts. Unlike existing pre-training methods that only harvest learning signals from local contexts of naturally occurring texts, we propose a generalized notion of distant supervision to automatically connect multiple pieces of text and tables to create pre-training examples that require long-range reasoning. Different types of reasoning are simulated, including intersecting multiple pieces of evidence, bridging from one piece of evidence to another, and detecting unanswerable cases. We conduct a comprehensive evaluation on a variety of extractive question answering datasets ranging from single-hop to multi-hop and from text-only to table-only to hybrid that require various reasoning capabilities and show that ReasonBert achieves remarkable improvement over an array of strong baselines. Few-shot experiments further demonstrate that our pre-training method substantially improves sample efficiency.", "revisions": [ { "version": "v1", "updated": "2021-09-10T14:49:44.000Z" } ], "analyses": { "keywords": [ "distant supervision", "reasonbert", "question answering datasets ranging", "pre-training method", "automatically connect multiple pieces" ], "tags": [ "github project" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }