{ "id": "1602.08465", "version": "v1", "published": "2016-02-26T20:10:27.000Z", "updated": "2016-02-26T20:10:27.000Z", "title": "Seq-NMS for Video Object Detection", "authors": [ "Wei Han", "Pooya Khorrami", "Tom Le Paine", "Prajit Ramachandran", "Mohammad Babaeizadeh", "Honghui Shi", "Jiana Li", "Shuicheng Yan", "Thomas S. Huang" ], "comment": "Technical Report for Imagenet VID Competition 2015", "categories": [ "cs.CV" ], "abstract": "Video object detection is challenging because objects that are easily detected in one frame may be difficult to detect in another frame within the same clip. Recently, there have been major advances for doing object detection in a single image. These methods typically contain three phases: (i) object proposal generation (ii) object classification and (iii) post-processing. We propose a modification of the post-processing phase that uses high-scoring object detections from nearby frames to boost scores of weaker detections within the same clip. We show that our method obtains superior results to state-of-the-art single image object detection techniques. Our method placed 3rd in the video object detection (VID) task of the ImageNet Large Scale Visual Recognition Challenge 2015 (ILSVRC2015).", "revisions": [ { "version": "v1", "updated": "2016-02-26T20:10:27.000Z" } ], "analyses": { "keywords": [ "video object detection", "large scale visual recognition challenge", "state-of-the-art single image object detection", "imagenet large scale visual recognition", "single image object detection techniques" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2016arXiv160208465H" } } }