{ "id": "2203.17250", "version": "v1", "published": "2022-03-30T13:22:44.000Z", "updated": "2022-03-30T13:22:44.000Z", "title": "Generation and Simulation of Synthetic Datasets with Copulas", "authors": [ "Regis Houssou", "Mihai-Cezar Augustin", "Efstratios Rappos", "Vivien Bonvin", "Stephan Robert-Nicoud" ], "categories": [ "cs.LG", "cs.AI" ], "abstract": "This paper proposes a new method to generate synthetic data sets based on copula models. Our goal is to produce surrogate data resembling real data in terms of marginal and joint distributions. We present a complete and reliable algorithm for generating a synthetic data set comprising numeric or categorical variables. Applying our methodology to two datasets shows better performance compared to other methods such as SMOTE and autoencoders.", "revisions": [ { "version": "v1", "updated": "2022-03-30T13:22:44.000Z" } ], "analyses": { "keywords": [ "synthetic datasets", "data set comprising numeric", "surrogate data resembling real data", "generate synthetic data sets", "simulation" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }