{ "id": "1907.10837", "version": "v1", "published": "2019-07-25T04:48:10.000Z", "updated": "2019-07-25T04:48:10.000Z", "title": "Submission to ActivityNet Challenge 2019: Task B Spatio-temporal Action Localization", "authors": [ "Chunfei Ma", "Joonhyang Choi", "Byeongwon Lee", "Seungji Yang" ], "comment": "4 pages, 2 fighures", "categories": [ "cs.CV", "cs.LG" ], "abstract": "This technical report present an overview of our system proposed for the spatio-temporal action localization(SAL) task in ActivityNet Challenge 2019. Unlike previous two-streams-based works, we focus on exploring the end-to-end trainable architecture using only RGB sequential images. To this end, we employ a previously proposed simple yet effective two-branches network called SlowFast Networks which is capable of capturing both short- and long-term spatiotemporal features. Moreover, to handle the severe class imbalance and overfitting problems, we propose a correlation-preserving data augmentation method and a random label subsampling method which have been proven to be able to reduce overfitting and improve the performance.", "revisions": [ { "version": "v1", "updated": "2019-07-25T04:48:10.000Z" } ], "analyses": { "keywords": [ "spatio-temporal action localization", "activitynet challenge", "submission", "random label subsampling method", "correlation-preserving data augmentation method" ], "note": { "typesetting": "TeX", "pages": 4, "language": "en", "license": "arXiv", "status": "editable" } } }