@article {1712.05652, archivePrefix = {arXiv}, eprint = {1712.05652}, primaryClass = {cs.LG}, author = {Jack Lindsey}, title = {Pre-training Attention Mechanisms}, year = {2017}, url = {http://arxiv.org/abs/1712.05652} }