{
  "id": "2006.11693",
  "version": "v1",
  "published": "2020-06-21T02:38:59.000Z",
  "updated": "2020-06-21T02:38:59.000Z",
  "title": "Dense-Captioning Events in Videos: SYSU Submission to ActivityNet Challenge 2020",
  "authors": [
    "Teng Wang",
    "Huicheng Zheng",
    "Mingjing Yu"
  ],
  "comment": "technical report, 4 pages, 2 figures",
  "categories": [
    "cs.CV"
  ],
  "abstract": "This technical report presents a brief description of our submission to the dense video captioning task of ActivityNet Challenge 2020. Our approach follows a two-stage pipeline: first, we extract a set of temporal event proposals; then we propose a multi-event captioning model to capture the event-level temporal relationships and effectively fuse the multi-modal information. Our approach achieves a 9.28 METEOR score on the test set.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-06-21T02:38:59.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "activitynet challenge",
      "sysu submission",
      "dense-captioning events",
      "dense video captioning task",
      "event-level temporal relationships"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 4,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}