{
  "id": "1607.03476",
  "version": "v1",
  "published": "2016-07-12T19:45:12.000Z",
  "updated": "2016-07-12T19:45:12.000Z",
  "title": "End-to-end training of object class detectors for mean average precision",
  "authors": [
    "Paul Henderson",
    "Vittorio Ferrari"
  ],
  "categories": [
    "cs.CV"
  ],
  "abstract": "We present a method for training CNN-based object class detectors directly using mean average precision (mAP) as the training loss, in a truly end-to-end fashion that includes non-maximum suppresion (NMS) at training time. This contrasts with the traditional approach of training a CNN for a window classification loss, then applying NMS only at test time, when mAP is used as the evaluation metric in place of classification accuracy. However, mAP following NMS forms a piecewise-constant structured loss over thousands of windows, with gradients that do not convey useful information for gradient descent. Hence, we define new, general gradient-like quantities for piecewise constant functions, which have wide applicability. We describe how to calculate these efficiently for mAP following NMS, enabling to train a detector based on Fast R-CNN directly for mAP. This model achieves equivalent performance to the standard Fast R-CNN on the PASCAL VOC 2007 and 2012 datasets, while being conceptually more appealing as the very same model and loss are used at both training and test time.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2016-07-12T19:45:12.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "mean average precision",
      "cnn-based object class detectors",
      "end-to-end training",
      "model achieves equivalent performance",
      "fast r-cnn"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}