{
  "id": "1711.08447",
  "version": "v1",
  "published": "2017-11-22T18:48:54.000Z",
  "updated": "2017-11-22T18:48:54.000Z",
  "title": "VITON: An Image-based Virtual Try-on Network",
  "authors": [
    "Xintong Han",
    "Zuxuan Wu",
    "Zhe Wu",
    "Ruichi Yu",
    "Larry S. Davis"
  ],
  "categories": [
    "cs.CV"
  ],
  "abstract": "We present an image-based VIirtual Try-On Network (VITON) without using 3D information in any form, which seamlessly transfers a desired clothing item onto the corresponding region of a person using a coarse-to-fine strategy. Conditioned upon a new clothing-agnostic yet descriptive person representation, our framework first generates a coarse synthesized image with the target clothing item overlaid on that same person in the same pose. We further enhance the initial blurry clothing area with a refinement network. The network is trained to learn how much detail to utilize from the target clothing item, and where to apply to the person in order to synthesize a photo-realistic image in which the target item deforms naturally with clear visual patterns. Experiments on our newly collected Zalando dataset demonstrate its promise in the image-based virtual try-on task over state-of-the-art generative models.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-11-22T18:48:54.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "image-based virtual try-on network",
      "target clothing item",
      "image-based virtual try-on task",
      "newly collected zalando dataset demonstrate",
      "initial blurry clothing area"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}