{ "id": "2206.10861", "version": "v1", "published": "2022-06-22T06:11:07.000Z", "updated": "2022-06-22T06:11:07.000Z", "title": "UniCon+: ICTCAS-UCAS Submission to the AVA-ActiveSpeaker Task at ActivityNet Challenge 2022", "authors": [ "Yuanhang Zhang", "Susan Liang", "Shuang Yang", "Shiguang Shan" ], "comment": "5 pages, 3 figures; technical report for AVA Challenge (see https://research.google.com/ava/challenge.html) at the International Challenge on Activity Recognition (ActivityNet), CVPR 2022", "categories": [ "cs.CV", "cs.SD", "eess.AS" ], "abstract": "This report presents a brief description of our winning solution to the AVA Active Speaker Detection (ASD) task at ActivityNet Challenge 2022. Our underlying model UniCon+ continues to build on our previous work, the Unified Context Network (UniCon) and Extended UniCon which are designed for robust scene-level ASD. We augment the architecture with a simple GRU-based module that allows information of recurring identities to flow across scenes through read and update operations. We report a best result of 94.47% mAP on the AVA-ActiveSpeaker test set, which continues to rank first on this year's challenge leaderboard and significantly pushes the state-of-the-art.", "revisions": [ { "version": "v1", "updated": "2022-06-22T06:11:07.000Z" } ], "analyses": { "keywords": [ "activitynet challenge", "ictcas-ucas submission", "ava-activespeaker task", "ava-activespeaker test set", "ava active speaker detection" ], "note": { "typesetting": "TeX", "pages": 5, "language": "en", "license": "arXiv", "status": "editable" } } }