{ "id": "1805.09137", "version": "v1", "published": "2018-05-13T19:13:16.000Z", "updated": "2018-05-13T19:13:16.000Z", "title": "Image Captioning", "authors": [ "Vikram Mullachery", "Vishal Motwani" ], "comment": "arXiv admin note: text overlap with arXiv:1609.06647 by other authors", "categories": [ "cs.CV", "cs.AI" ], "abstract": "This paper discusses and demonstrates the outcomes from our experimentation on Image Captioning. Image captioning is a much more involved task than image recognition or classification, because of the additional challenge of recognizing the interdependence between the objects/concepts in the image and the creation of a succinct sentential narration. Experiments on several labeled datasets show the accuracy of the model and the fluency of the language it learns solely from image descriptions. As a toy application, we apply image captioning to create video captions, and we advance a few hypotheses on the challenges we encountered.", "revisions": [ { "version": "v1", "updated": "2018-05-13T19:13:16.000Z" } ], "analyses": { "keywords": [ "image captioning", "create video captions", "succinct sentential narration", "toy application", "image descriptions" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }