{ "id": "1806.04391", "version": "v1", "published": "2018-06-12T08:42:55.000Z", "updated": "2018-06-12T08:42:55.000Z", "title": "Qiniu Submission to ActivityNet Challenge 2018", "authors": [ "Xiaoteng Zhang", "Yixin Bao", "Feiyun Zhang", "Kai Hu", "Yicheng Wang", "Liang Zhu", "Qinzhu He", "Yining Lin", "Jie Shao", "Yao Peng" ], "comment": "4 pages, 3 figures, CVPR workshop", "categories": [ "cs.CV" ], "abstract": "In this paper, we introduce our submissions for the tasks of trimmed activity recognition (Kinetics) and trimmed event recognition (Moments in Time) for Activitynet Challenge 2018. In the two tasks, non-local neural networks and temporal segment networks are implemented as our base models. Multi-modal cues such as RGB image, optical flow and acoustic signal have also been used in our method. We also propose new non-local-based models for further improvement on the recognition accuracy. The final submissions after ensembling the models achieve 83.5% top-1 accuracy and 96.8% top-5 accuracy on the Kinetics validation set, 35.81% top-1 accuracy and 62.59% top-5 accuracy on the MIT validation set.", "revisions": [ { "version": "v1", "updated": "2018-06-12T08:42:55.000Z" } ], "analyses": { "keywords": [ "activitynet challenge", "qiniu submission", "mit validation set", "kinetics validation set", "temporal segment networks" ], "note": { "typesetting": "TeX", "pages": 4, "language": "en", "license": "arXiv", "status": "editable" } } }