{ "id": "1711.08447", "version": "v1", "published": "2017-11-22T18:48:54.000Z", "updated": "2017-11-22T18:48:54.000Z", "title": "VITON: An Image-based Virtual Try-on Network", "authors": [ "Xintong Han", "Zuxuan Wu", "Zhe Wu", "Ruichi Yu", "Larry S. Davis" ], "categories": [ "cs.CV" ], "abstract": "We present an image-based VIirtual Try-On Network (VITON) without using 3D information in any form, which seamlessly transfers a desired clothing item onto the corresponding region of a person using a coarse-to-fine strategy. Conditioned upon a new clothing-agnostic yet descriptive person representation, our framework first generates a coarse synthesized image with the target clothing item overlaid on that same person in the same pose. We further enhance the initial blurry clothing area with a refinement network. The network is trained to learn how much detail to utilize from the target clothing item, and where to apply to the person in order to synthesize a photo-realistic image in which the target item deforms naturally with clear visual patterns. Experiments on our newly collected Zalando dataset demonstrate its promise in the image-based virtual try-on task over state-of-the-art generative models.", "revisions": [ { "version": "v1", "updated": "2017-11-22T18:48:54.000Z" } ], "analyses": { "keywords": [ "image-based virtual try-on network", "target clothing item", "image-based virtual try-on task", "newly collected zalando dataset demonstrate", "initial blurry clothing area" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }