{ "id": "cs/0703125", "version": "v1", "published": "2007-03-25T01:19:14.000Z", "updated": "2007-03-25T01:19:14.000Z", "title": "Intrinsic dimension of a dataset: what properties does one expect?", "authors": [ "Vladimir Pestov" ], "comment": "6 pages, 6 figures, 1 table, latex with IEEE macros, final submission to Proceedings of the 22nd IJCNN (Orlando, FL, August 12-17, 2007)", "journal": "Proceedings of the 20th International Joint Conference on Neural Networks (IJCNN'2007), Orlando, Florida (Aug. 12--17, 2007), pp. 1775--1780.", "categories": [ "cs.LG" ], "abstract": "We propose an axiomatic approach to the concept of an intrinsic dimension of a dataset, based on a viewpoint of geometry of high-dimensional structures. Our first axiom postulates that high values of dimension be indicative of the presence of the curse of dimensionality (in a certain precise mathematical sense). The second axiom requires the dimension to depend smoothly on a distance between datasets (so that the dimension of a dataset and that of an approximating principal manifold would be close to each other). The third axiom is a normalization condition: the dimension of the Euclidean $n$-sphere $\\s^n$ is $\\Theta(n)$. We give an example of a dimension function satisfying our axioms, even though it is in general computationally unfeasible, and discuss a computationally cheap function satisfying most but not all of our axioms (the ``intrinsic dimensionality'' of Ch\\'avez et al.)", "revisions": [ { "version": "v1", "updated": "2007-03-25T01:19:14.000Z" } ], "analyses": { "keywords": [ "properties", "first axiom postulates", "intrinsic dimensionality", "axiomatic approach", "high values" ], "tags": [ "journal article" ], "note": { "typesetting": "LaTeX", "pages": 6, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2007cs........3125P" } } }