{ "id": "1510.05970", "version": "v1", "published": "2015-10-20T17:15:05.000Z", "updated": "2015-10-20T17:15:05.000Z", "title": "Stereo Matching by Training a Convolutional Neural Network to Compare Image Patches", "authors": [ "Jure Žbontar", "Yann LeCun" ], "comment": "Submitted to the Journal of Machine Learning Research", "categories": [ "cs.CV", "cs.LG", "cs.NE" ], "abstract": "We present a method for extracting depth information from a rectified image pair. Our approach focuses on the first stage of many stereo algorithms: the matching cost computation. We approach the problem by learning a similarity measure on small image patches using a convolutional neural network. Training is carried out in a supervised manner by constructing a binary classification data set with examples of similar and dissimilar pairs of patches. We examine two network architectures for this task: one tuned for speed, the other for accuracy. The output of the convolutional neural network is used to initialize the stereo matching cost. A series of post-processing steps follow: cross-based cost aggregation, semiglobal matching, a left-right consistency check, subpixel enhancement, a median filter, and a bilateral filter. We evaluate our method on the KITTI 2012, KITTI 2015, and Middlebury stereo data sets and show that it outperforms other approaches on all three data sets.", "revisions": [ { "version": "v1", "updated": "2015-10-20T17:15:05.000Z" } ], "analyses": { "keywords": [ "convolutional neural network", "image patches", "stereo matching", "middlebury stereo data sets", "binary classification data set" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2015arXiv151005970Z" } } }