{
  "id": "1510.05970",
  "version": "v1",
  "published": "2015-10-20T17:15:05.000Z",
  "updated": "2015-10-20T17:15:05.000Z",
  "title": "Stereo Matching by Training a Convolutional Neural Network to Compare Image Patches",
  "authors": [
    "Jure Žbontar",
    "Yann LeCun"
  ],
  "comment": "Submitted to the Journal of Machine Learning Research",
  "categories": [
    "cs.CV",
    "cs.LG",
    "cs.NE"
  ],
  "abstract": "We present a method for extracting depth information from a rectified image pair. Our approach focuses on the first stage of many stereo algorithms: the matching cost computation. We approach the problem by learning a similarity measure on small image patches using a convolutional neural network. Training is carried out in a supervised manner by constructing a binary classification data set with examples of similar and dissimilar pairs of patches. We examine two network architectures for this task: one tuned for speed, the other for accuracy. The output of the convolutional neural network is used to initialize the stereo matching cost. A series of post-processing steps follow: cross-based cost aggregation, semiglobal matching, a left-right consistency check, subpixel enhancement, a median filter, and a bilateral filter. We evaluate our method on the KITTI 2012, KITTI 2015, and Middlebury stereo data sets and show that it outperforms other approaches on all three data sets.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2015-10-20T17:15:05.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "convolutional neural network",
      "image patches",
      "stereo matching",
      "middlebury stereo data sets",
      "binary classification data set"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2015arXiv151005970Z"
    }
  }
}