{ "id": "2103.08493", "version": "v1", "published": "2021-03-15T16:10:23.000Z", "updated": "2021-03-15T16:10:23.000Z", "title": "How Many Data Points is a Prompt Worth?", "authors": [ "Teven Le Scao", "Alexander M. Rush" ], "comment": "NAACL HLT 2021", "categories": [ "cs.LG" ], "abstract": "When fine-tuning pretrained models for classification, researchers either use a generic model head or a task-specific prompt for prediction. Proponents of prompting have argued that prompts provide a method for injecting task-specific guidance, which is beneficial in low-data regimes. We aim to quantify this benefit through rigorous testing of prompts in a fair setting: comparing prompted and head-based fine-tuning in equal conditions across many tasks and data sizes. By controlling for many sources of advantage, we find that prompting does indeed provide a benefit, and that this benefit can be quantified per task. Results show that prompting is often worth 100s of data points on average across classification tasks.", "revisions": [ { "version": "v1", "updated": "2021-03-15T16:10:23.000Z" } ], "analyses": { "keywords": [ "data points", "prompt worth", "generic model head", "data sizes", "equal conditions" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }