2023 |
Falkiewicz, Maciej; Takeishi, Naoya; Shekhzadeh, Imahn; Wehenkel, Antoine; Delaunoy, Arnaud; Louppe, Gilles; Kalousis, Alexandros Calibrating Neural Simulation-Based Inference with Differentiable Coverage Probability Inproceedings Advances in Neural Information Processing Systems 36, pp. 1082–1099, 2023. @inproceedings{falkiewicz23, title = {Calibrating Neural Simulation-Based Inference with Differentiable Coverage Probability}, author = {Maciej Falkiewicz and Naoya Takeishi and Imahn Shekhzadeh and Antoine Wehenkel and Arnaud Delaunoy and Gilles Louppe and Alexandros Kalousis}, url = {https://github.com/DMML-Geneva/calibrated-posterior}, doi = {10.48550/arXiv.2310.13402}, year = {2023}, date = {2023-10-20}, booktitle = {Advances in Neural Information Processing Systems 36}, pages = {1082--1099}, abstract = {Bayesian inference allows expressing the uncertainty of posterior belief under a probabilistic model given prior information and the likelihood of the evidence. Predominantly, the likelihood function is only implicitly established by a simulator posing the need for simulation-based inference (SBI). However, the existing algorithms can yield overconfident posteriors (Hermans et al., 2022) defeating the whole purpose of credibility if the uncertainty quantification is inaccurate. We propose to include a calibration term directly into the training objective of the neural model in selected amortized SBI techniques. By introducing a relaxation of the classical formulation of calibration error we enable end-to-end backpropagation. The proposed method is not tied to any particular neural model and brings moderate computational overhead compared to the profits it introduces. It is directly applicable to existing computational pipelines allowing reliable black-box posterior inference. We empirically show on six benchmark problems that the proposed method achieves competitive or better results in terms of coverage and expected posterior density than the previously existing approaches.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Bayesian inference allows expressing the uncertainty of posterior belief under a probabilistic model given prior information and the likelihood of the evidence. Predominantly, the likelihood function is only implicitly established by a simulator posing the need for simulation-based inference (SBI). However, the existing algorithms can yield overconfident posteriors (Hermans et al., 2022) defeating the whole purpose of credibility if the uncertainty quantification is inaccurate. We propose to include a calibration term directly into the training objective of the neural model in selected amortized SBI techniques. By introducing a relaxation of the classical formulation of calibration error we enable end-to-end backpropagation. The proposed method is not tied to any particular neural model and brings moderate computational overhead compared to the profits it introduces. It is directly applicable to existing computational pipelines allowing reliable black-box posterior inference. We empirically show on six benchmark problems that the proposed method achieves competitive or better results in terms of coverage and expected posterior density than the previously existing approaches. |
2022 |
Takeishi, Naoya; Kalousis, Alexandros 2022, (arXiv:2210.13103). @unpublished{TakeishiDeepGreyBox2022, title = {Deep Grey-Box Modeling With Adaptive Data-Driven Models Toward Trustworthy Estimation of Theory-Driven Models}, author = {Naoya Takeishi and Alexandros Kalousis}, url = {https://arxiv.org/abs/2210.13103}, year = {2022}, date = {2022-10-24}, abstract = {The combination of deep neural nets and theory-driven models, which we call deep grey-box modeling, can be inherently interpretable to some extent thanks to the theory backbone. Deep grey-box models are usually learned with a regularized risk minimization to prevent a theory-driven part from being overwritten and ignored by a deep neural net. However, an estimation of the theory-driven part obtained by uncritically optimizing a regularizer can hardly be trustworthy when we are not sure what regularizer is suitable for the given data, which may harm the interpretability. Toward a trustworthy estimation of the theory-driven part, we should analyze regularizers' behavior to compare different candidates and to justify a specific choice. In this paper, we present a framework that enables us to analyze a regularizer's behavior empirically with a slight change in the neural net's architecture and the training objective.}, note = {arXiv:2210.13103}, keywords = {}, pubstate = {published}, tppubtype = {unpublished} } The combination of deep neural nets and theory-driven models, which we call deep grey-box modeling, can be inherently interpretable to some extent thanks to the theory backbone. Deep grey-box models are usually learned with a regularized risk minimization to prevent a theory-driven part from being overwritten and ignored by a deep neural net. However, an estimation of the theory-driven part obtained by uncritically optimizing a regularizer can hardly be trustworthy when we are not sure what regularizer is suitable for the given data, which may harm the interpretability. Toward a trustworthy estimation of the theory-driven part, we should analyze regularizers' behavior to compare different candidates and to justify a specific choice. In this paper, we present a framework that enables us to analyze a regularizer's behavior empirically with a slight change in the neural net's architecture and the training objective. |
Anagnostopoulos, Grigorios G; Kalousis, Alexandros Can I Trust This Location Estimate? Reproducibly Benchmarking the Methods of Dynamic Accuracy Estimation of Localization Journal Article Sensors MDPI, 22 (3), pp. 39, 2022, ISSN: 1424-8220. @article{s22031088, title = {Can I Trust This Location Estimate? Reproducibly Benchmarking the Methods of Dynamic Accuracy Estimation of Localization }, author = {Grigorios G. Anagnostopoulos and Alexandros Kalousis}, url = {https://www.mdpi.com/1424-8220/22/3/1088 https://zenodo.org/record/5589651#.YfpXUS1Q0_U}, doi = {10.3390/s22031088}, issn = {1424-8220}, year = {2022}, date = {2022-01-30}, journal = {Sensors MDPI}, volume = {22}, number = {3}, pages = {39}, abstract = {Despite the great attention that the research community has paid to the creation of novel indoor positioning methods, a rather limited volume of works has focused on the confidence that Indoor Positioning Systems (IPS) assign to the position estimates that they produce. The concept of estimating, dynamically, the accuracy of the position estimates provided by an IPS has been sporadically studied in the literature of the field. Recently, this concept has started being studied as well in the context of outdoor positioning systems of Internet of Things (IoT) based on Low-Power Wide-Area Networks (LPWANs). What is problematic is that the consistent comparison of the proposed methods is quasi nonexistent: new methods rarely use previous ones as baselines; often, a small number of evaluation metrics are reported while different metrics are reported among different relevant publications, the use of open data is rare, and the publication of open code is absent. In this work, we present an open-source, reproducible benchmarking framework for evaluating and consistently comparing various methods of Dynamic Accuracy Estimation (DAE). This work reviews the relevant literature, presenting in a consistent terminology commonalities and differences and discussing baselines and evaluation metrics. Moreover, it evaluates multiple methods of DAE using open data, open code, and a rich set of relevant evaluation metrics. This is the first work aiming to establish the state of the art of methods of DAE determination in IPS and in LPWAN positioning systems, through an open, transparent, holistic, reproducible, and consistent evaluation of the methods proposed in the relevant literature. }, keywords = {}, pubstate = {published}, tppubtype = {article} } Despite the great attention that the research community has paid to the creation of novel indoor positioning methods, a rather limited volume of works has focused on the confidence that Indoor Positioning Systems (IPS) assign to the position estimates that they produce. The concept of estimating, dynamically, the accuracy of the position estimates provided by an IPS has been sporadically studied in the literature of the field. Recently, this concept has started being studied as well in the context of outdoor positioning systems of Internet of Things (IoT) based on Low-Power Wide-Area Networks (LPWANs). What is problematic is that the consistent comparison of the proposed methods is quasi nonexistent: new methods rarely use previous ones as baselines; often, a small number of evaluation metrics are reported while different metrics are reported among different relevant publications, the use of open data is rare, and the publication of open code is absent. In this work, we present an open-source, reproducible benchmarking framework for evaluating and consistently comparing various methods of Dynamic Accuracy Estimation (DAE). This work reviews the relevant literature, presenting in a consistent terminology commonalities and differences and discussing baselines and evaluation metrics. Moreover, it evaluates multiple methods of DAE using open data, open code, and a rich set of relevant evaluation metrics. This is the first work aiming to establish the state of the art of methods of DAE determination in IPS and in LPWAN positioning systems, through an open, transparent, holistic, reproducible, and consistent evaluation of the methods proposed in the relevant literature. |
2021 |
Takeishi, Naoya; Kalousis, Alexandros Variational Autoencoder with Differentiable Physics Engine for Human Gait Analysis and Synthesis Workshop Deep Generative Models and Downstream Applications Workshop, 2021. @workshop{Takeishi2021b, title = { Variational Autoencoder with Differentiable Physics Engine for Human Gait Analysis and Synthesis}, author = {Naoya Takeishi and Alexandros Kalousis}, url = {https://openreview.net/forum?id=9ISlKio3Bt}, year = {2021}, date = {2021-12-14}, booktitle = {Deep Generative Models and Downstream Applications Workshop}, abstract = {We address the task of learning generative models of human gait. As gait motion always follows the physical laws, a generative model should also produce outputs that comply with the physical laws, particularly rigid body dynamics with contact and friction. We propose a deep generative model combined with a differentiable physics engine, which outputs physically plausible signals by construction. The proposed model is also equipped with a policy network conditioned on each sample. We show an example of the application of such a model to style transfer of gait.}, keywords = {}, pubstate = {published}, tppubtype = {workshop} } We address the task of learning generative models of human gait. As gait motion always follows the physical laws, a generative model should also produce outputs that comply with the physical laws, particularly rigid body dynamics with contact and friction. We propose a deep generative model combined with a differentiable physics engine, which outputs physically plausible signals by construction. The proposed model is also equipped with a policy network conditioned on each sample. We show an example of the application of such a model to style transfer of gait. |
Takeishi, Naoya; Kalousis, Alexandros Physics-Integrated Variational Autoencoders for Robust and Interpretable Generative Modeling Inproceedings Advances in Neural Information Processing Systems 34, 2021. @inproceedings{Takeishi2021a, title = {Physics-Integrated Variational Autoencoders for Robust and Interpretable Generative Modeling}, author = {Naoya Takeishi and Alexandros Kalousis}, url = {https://openreview.net/forum?id=0p0gt1Pn2Gv https://github.com/n-takeishi/phys-vae}, year = {2021}, date = {2021-12-07}, booktitle = {Advances in Neural Information Processing Systems 34}, abstract = {Integrating physics models within machine learning models holds considerable promise toward learning robust models with improved interpretability and abilities to extrapolate. In this work, we focus on the integration of incomplete physics models into deep generative models. In particular, we introduce an architecture of variational autoencoders (VAEs) in which a part of the latent space is grounded by physics. A key technical challenge is to strike a balance between the incomplete physics and trainable components such as neural networks for ensuring that the physics part is used in a meaningful manner. To this end, we propose a regularized learning method that controls the effect of the trainable components and preserves the semantics of the physics-based latent variables as intended. We not only demonstrate generative performance improvements over a set of synthetic and real-world datasets, but we also show that we learn robust models that can consistently extrapolate beyond the training distribution in a meaningful manner. Moreover, we show that we can control the generative process in an interpretable manner.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Integrating physics models within machine learning models holds considerable promise toward learning robust models with improved interpretability and abilities to extrapolate. In this work, we focus on the integration of incomplete physics models into deep generative models. In particular, we introduce an architecture of variational autoencoders (VAEs) in which a part of the latent space is grounded by physics. A key technical challenge is to strike a balance between the incomplete physics and trainable components such as neural networks for ensuring that the physics part is used in a meaningful manner. To this end, we propose a regularized learning method that controls the effect of the trainable components and preserves the semantics of the physics-based latent variables as intended. We not only demonstrate generative performance improvements over a set of synthetic and real-world datasets, but we also show that we learn robust models that can consistently extrapolate beyond the training distribution in a meaningful manner. Moreover, we show that we can control the generative process in an interpretable manner. |
Anagnostopoulos, Grigorios G; Kalousis, Alexandros Towards Reproducible Indoor Positioning Research Inproceedings conference, IPIN 2021 (Ed.): IPIN 2021 conference, 2021. @inproceedings{2021_Reproducible_Anagnostopoulos, title = {Towards Reproducible Indoor Positioning Research}, author = {Grigorios G. Anagnostopoulos and Alexandros Kalousis}, editor = {IPIN 2021 conference}, doi = {https://zenodo.org/record/5760345#.YbhYE9so-V4}, year = {2021}, date = {2021-11-29}, booktitle = {IPIN 2021 conference}, abstract = {The movement advocating for a more transparent and reproducible science has placed the issue of research reproducibility at the center of attention of various stakeholders related to academic research. Universities, funding institutions and publishers have started changing long established policies to encourage and support best practices for rigorous and transparent science making. Regarding the field of indoor positioning, there is a lack of standard evaluation procedures that would enable consistent comparisons. Moreover, the practices of Open Data and Open Source are on the verge of gaining popularity within the community of the field. This work, after providing an introduction to the landscape of research reproducibility from the viewpoint of the research community of Indoor Positioning, proceeds to its primary contribution: to provide a concrete set of suggestions that could accelerate the pace of the Indoor Positioning research community towards becoming a discipline of reproducible research.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The movement advocating for a more transparent and reproducible science has placed the issue of research reproducibility at the center of attention of various stakeholders related to academic research. Universities, funding institutions and publishers have started changing long established policies to encourage and support best practices for rigorous and transparent science making. Regarding the field of indoor positioning, there is a lack of standard evaluation procedures that would enable consistent comparisons. Moreover, the practices of Open Data and Open Source are on the verge of gaining popularity within the community of the field. This work, after providing an introduction to the landscape of research reproducibility from the viewpoint of the research community of Indoor Positioning, proceeds to its primary contribution: to provide a concrete set of suggestions that could accelerate the pace of the Indoor Positioning research community towards becoming a discipline of reproducible research. |
Anagnostopoulos, Grigorios G; Kalousis, Alexandros Proximity-Based Fingerprint Augmentation Inproceedings conference, IPIN 2021 (Ed.): IPIN 2021 conference, 2021. @inproceedings{2021_ProxyFAUG_Anagnostopoulos, title = {Proximity-Based Fingerprint Augmentation}, author = {Grigorios G. Anagnostopoulos and Alexandros Kalousis}, editor = {IPIN 2021 conference}, url = {https://zenodo.org/record/4457353#.YbhXGtso-V4 https://zenodo.org/record/4457391#.YbhXIdso-V4 https://arxiv.org/abs/2102.02706 }, doi = {10.1109/IPIN51156.2021.9662590}, year = {2021}, date = {2021-11-29}, booktitle = {IPIN 2021 conference}, abstract = {The proliferation of data-demanding machine learning methods has brought to light the necessity for methodologies which can enlarge the size of training datasets, with simple, rule-based methods. In-line with this concept, the fingerprint augmentation scheme proposed in this work aims to augment fingerprint datasets which are used to train positioning models. The proposed method utilizes fingerprints which are recorded in spacial proximity, in order to perform fingerprint augmentation, creating new fingerprints which combine the features of the original ones. The proposed method of composing the new, augmented fingerprints is inspired by the crossover and mutation operators of genetic algorithms. The ProxyFAUG method aims to improve the achievable positioning accuracy of fingerprint datasets, by introducing a rule-based, stochastic, proximity-based method of fingerprint augmentation. The performance of ProxyFAUG is evaluated in an outdoor Sigfox setting using a public dataset. The best performing published positioning method on this dataset is improved by 40% in terms of median error and 6% in terms of mean error, with the use of the augmented dataset. The analysis of the results indicate a systematic and significant performance improvement at the lower error quartiles, as indicated by the impressive improvement of the median error.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The proliferation of data-demanding machine learning methods has brought to light the necessity for methodologies which can enlarge the size of training datasets, with simple, rule-based methods. In-line with this concept, the fingerprint augmentation scheme proposed in this work aims to augment fingerprint datasets which are used to train positioning models. The proposed method utilizes fingerprints which are recorded in spacial proximity, in order to perform fingerprint augmentation, creating new fingerprints which combine the features of the original ones. The proposed method of composing the new, augmented fingerprints is inspired by the crossover and mutation operators of genetic algorithms. The ProxyFAUG method aims to improve the achievable positioning accuracy of fingerprint datasets, by introducing a rule-based, stochastic, proximity-based method of fingerprint augmentation. The performance of ProxyFAUG is evaluated in an outdoor Sigfox setting using a public dataset. The best performing published positioning method on this dataset is improved by 40% in terms of median error and 6% in terms of mean error, with the use of the augmented dataset. The analysis of the results indicate a systematic and significant performance improvement at the lower error quartiles, as indicated by the impressive improvement of the median error. |
Anagnostopoulos, Grigorios G; Kalousis, Alexandros Analysing the Data-Driven Approach of Dynamically Estimating Positioning Accuracy Inproceedings IoT, IEEE ICC 2021 -; Symposium, Sensor Networks (Ed.): IEEE ICC 2021 - IoT and Sensor Networks Symposium, 2021. @inproceedings{data_driven_DAE_anagnostopoulos_2020, title = {Analysing the Data-Driven Approach of Dynamically Estimating Positioning Accuracy}, author = {Grigorios G. Anagnostopoulos and Alexandros Kalousis}, editor = {IEEE ICC 2021 - IoT and Sensor Networks Symposium}, url = {https://arxiv.org/abs/2011.10478 https://ieeexplore.ieee.org/document/9500369 https://zenodo.org/record/4099079#.YfpWpC1Q1QI https://zenodo.org/record/4117818#.YfpWqy1Q1QI}, doi = {10.1109/ICC42927.2021.9500369}, year = {2021}, date = {2021-06-14}, booktitle = {IEEE ICC 2021 - IoT and Sensor Networks Symposium}, abstract = {The primary expectation from positioning systems is for them to provide the users with reliable estimates of their position. An additional piece of information that can greatly help the users utilize position estimates is the level of uncertainty that a positioning system assigns to the position estimate it produced. The concept of dynamically estimating the accuracy of position estimates of fingerprinting positioning systems has been sporadically discussed over the last decade in the literature of the field, where mainly handcrafted rules based on domain knowledge have been proposed. The emergence of IoT devices and the proliferation of data from Low Power Wide Area Networks (LPWANs) have facilitated the conceptualization of data-driven methods of determining the estimated certainty over position estimates. In this work, we analyze the data-driven approach of determining the Dynamic Accuracy Estimation (DAE), considering it in the broader context of a positioning system. More specifically, with the use of a public LoRaWAN dataset, the current work analyses: the repartition of the available training set between the tasks of determining the location estimates and the DAE, the concept of selecting a subset of the most reliable estimates, and the impact that the spatial distribution of the data has to the accuracy of the DAE. The work provides a wide overview of the data-driven approach of DAE determination in the context of the overall design of a positioning system.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The primary expectation from positioning systems is for them to provide the users with reliable estimates of their position. An additional piece of information that can greatly help the users utilize position estimates is the level of uncertainty that a positioning system assigns to the position estimate it produced. The concept of dynamically estimating the accuracy of position estimates of fingerprinting positioning systems has been sporadically discussed over the last decade in the literature of the field, where mainly handcrafted rules based on domain knowledge have been proposed. The emergence of IoT devices and the proliferation of data from Low Power Wide Area Networks (LPWANs) have facilitated the conceptualization of data-driven methods of determining the estimated certainty over position estimates. In this work, we analyze the data-driven approach of determining the Dynamic Accuracy Estimation (DAE), considering it in the broader context of a positioning system. More specifically, with the use of a public LoRaWAN dataset, the current work analyses: the repartition of the available training set between the tasks of determining the location estimates and the DAE, the concept of selecting a subset of the most reliable estimates, and the impact that the spatial distribution of the data has to the accuracy of the DAE. The work provides a wide overview of the data-driven approach of DAE determination in the context of the overall design of a positioning system. |
Gregorova, Magda; Desaules, Marc; Kalousis, Alexandros Learned transform compression with optimized entropy encoding Inproceedings Neural Compression: From Information Theory to Applications--Workshop@ ICLR 2021, 2021. @inproceedings{gregorova2021learned, title = {Learned transform compression with optimized entropy encoding}, author = {Magda Gregorova and Marc Desaules and Alexandros Kalousis}, url = {https://openreview.net/pdf?id=SmV8N_RbB_}, year = {2021}, date = {2021-01-01}, booktitle = {Neural Compression: From Information Theory to Applications--Workshop@ ICLR 2021}, abstract = {We consider the problem of learned transform compression where we learn both, the transform as well as the probability distribution over the discrete codes. We utilize a soft relaxation of the quantization operation to allow for back-propagation of gradients and employ vector (rather than scalar) quantization of the latent codes. Furthermore, we apply similar relaxation in the code probability assignments enabling direct optimization of the code entropy. To the best of our knowledge, this approach is completely novel. We conduct a set of proof-of concept experiments confirming the potency of our approaches.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We consider the problem of learned transform compression where we learn both, the transform as well as the probability distribution over the discrete codes. We utilize a soft relaxation of the quantization operation to allow for back-propagation of gradients and employ vector (rather than scalar) quantization of the latent codes. Furthermore, we apply similar relaxation in the code probability assignments enabling direct optimization of the code entropy. To the best of our knowledge, this approach is completely novel. We conduct a set of proof-of concept experiments confirming the potency of our approaches. |
2020 |
Mollaysa, Amina; Paige, Brooks; Kalousis, Alexandros Goal-directed Generation of Discrete Structures with Conditional Generative Models Inproceedings Proceedings of the 44th Conference on Neural Information Processing Systems, NeurIPS , 2020. @inproceedings{https://dblp.org/rec/conf/nips/MollaysaPK20, title = {Goal-directed Generation of Discrete Structures with Conditional Generative Models}, author = {Amina Mollaysa and Brooks Paige and Alexandros Kalousis }, url = {https://papers.nips.cc/paper/2020/file/f9b9f0fef2274a6b7009b5d52f44a3b6-Paper.pdf https://arxiv.org/abs/2010.02311 https://github.com/amina11/Goal-directed-Generation-of-Discrete-Structures}, year = {2020}, date = {2020-12-05}, booktitle = {Proceedings of the 44th Conference on Neural Information Processing Systems, NeurIPS }, abstract = {Despite recent advances, goal-directed generation of structured discrete data remains challenging. For problems such as program synthesis (generating source code) and materials design (generating molecules), finding examples which satisfy desired constraints or exhibit desired properties is difficult. In practice, expensive heuristic search or reinforcement learning algorithms are often employed. In this paper we investigate the use of conditional generative models which directly attack this inverse problem, by modeling the distribution of discrete structures given properties of interest. Unfortunately, maximum likelihood training of such models often fails with the samples from the generative model inadequately respecting the input properties. To address this, we introduce a novel approach to directly optimize a reinforcement learning objective, maximizing an expected reward. We avoid high-variance score-function estimators that would otherwise be required by sampling from an approximation to the normalized rewards, allowing simple Monte Carlo estimation of model gradients. We test our methodology on two tasks: generating molecules with user-defined properties and identifying short python expressions which evaluate to a given target value. In both cases, we find improvements over maximum likelihood estimation and other baselines.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Despite recent advances, goal-directed generation of structured discrete data remains challenging. For problems such as program synthesis (generating source code) and materials design (generating molecules), finding examples which satisfy desired constraints or exhibit desired properties is difficult. In practice, expensive heuristic search or reinforcement learning algorithms are often employed. In this paper we investigate the use of conditional generative models which directly attack this inverse problem, by modeling the distribution of discrete structures given properties of interest. Unfortunately, maximum likelihood training of such models often fails with the samples from the generative model inadequately respecting the input properties. To address this, we introduce a novel approach to directly optimize a reinforcement learning objective, maximizing an expected reward. We avoid high-variance score-function estimators that would otherwise be required by sampling from an approximation to the normalized rewards, allowing simple Monte Carlo estimation of model gradients. We test our methodology on two tasks: generating molecules with user-defined properties and identifying short python expressions which evaluate to a given target value. In both cases, we find improvements over maximum likelihood estimation and other baselines. |
Amina Mollaysa Brooks Paige, Alexandros Kalousis Conditional generation of molecules from disentangled representations Workshop Machine Learning for Molecules Workshop NeurIPS , 2020. @workshop{A.Molaysa2020, title = {Conditional generation of molecules from disentangled representations}, author = {Amina Mollaysa, Brooks Paige, Alexandros Kalousis}, url = {https://ml4molecules.github.io/papers2020/ML4Molecules_2020_paper_52.pdf}, year = {2020}, date = {2020-12-05}, booktitle = {Machine Learning for Molecules Workshop NeurIPS }, abstract = {Though machine learning approaches have shown great success in estimating properties of small molecules, the inverse problem of generating molecules with desired properties remains challenging. This difficulty is in part because the set of molecules which have a given property is structurally very diverse. Treating this inverse problem as a generative modeling task, we draw upon work in disentangled representations to learn a variational autoencoder (VAE) which includes latent variables representing both property information and molecular structure. By explicitly setting the property to a desired value at generation time, can perform conditional molecule generation via a “style transfer” process. We improve the disentangling behaviour relative to baseline VAE models by introducing a novel regularization term that constrains the generated molecules to have the property provided to the generation network, no matter how the latent factor changes.}, keywords = {}, pubstate = {published}, tppubtype = {workshop} } Though machine learning approaches have shown great success in estimating properties of small molecules, the inverse problem of generating molecules with desired properties remains challenging. This difficulty is in part because the set of molecules which have a given property is structurally very diverse. Treating this inverse problem as a generative modeling task, we draw upon work in disentangled representations to learn a variational autoencoder (VAE) which includes latent variables representing both property information and molecular structure. By explicitly setting the property to a desired value at generation time, can perform conditional molecule generation via a “style transfer” process. We improve the disentangling behaviour relative to baseline VAE models by introducing a novel regularization term that constrains the generated molecules to have the property provided to the generation network, no matter how the latent factor changes. |
Lavda, Frantzeska; Gregorova, Magda; Kalousis, Alexandros Data-Dependent Conditional Priors for Unsupervised Learning of Multimodal Data Journal Article Entropy2020, 22(8) (888), 2020. @article{Lavda2020entropy, title = {Data-Dependent Conditional Priors for Unsupervised Learning of Multimodal Data}, author = {Frantzeska Lavda and Magda Gregorova and Alexandros Kalousis}, url = {https://www.mdpi.com/1099-4300/22/8/888 https://bitbucket.org/dmmlgeneva/cp-vae/src/master/}, doi = {https://doi.org/10.3390/e22080888}, year = {2020}, date = {2020-08-13}, journal = {Entropy2020}, volume = {22(8)}, number = {888}, abstract = {One of the major shortcomings of variational autoencoders is the inability to produce generations from the individual modalities of data originating from mixture distributions. This is primarily due to the use of a simple isotropic Gaussian as the prior for the latent code in the ancestral sampling procedure for data generations. In this paper, we propose a novel formulation of variational autoencoders, conditional prior VAE (CP-VAE), with a two-level generative process for the observed data where continuous z and a discrete c variables are introduced in addition to the observed variables x. By learning data-dependent conditional priors, the new variational objective naturally encourages a better match between the posterior and prior conditionals, and the learning of the latent categories encoding the major source of variation of the original data in an unsupervised manner. Through sampling continuous latent code from the data-dependent conditional priors, we are able to generate new samples from the individual mixture components corresponding, to the multimodal structure over the original data. Moreover, we unify and analyse our objective under different independence assumptions for the joint distribution of the continuous and discrete latent variables. We provide an empirical evaluation on one synthetic dataset and three image datasets, FashionMNIST, MNIST, and Omniglot, illustrating the generative performance of our new model comparing to multiple baselines.}, keywords = {}, pubstate = {published}, tppubtype = {article} } One of the major shortcomings of variational autoencoders is the inability to produce generations from the individual modalities of data originating from mixture distributions. This is primarily due to the use of a simple isotropic Gaussian as the prior for the latent code in the ancestral sampling procedure for data generations. In this paper, we propose a novel formulation of variational autoencoders, conditional prior VAE (CP-VAE), with a two-level generative process for the observed data where continuous z and a discrete c variables are introduced in addition to the observed variables x. By learning data-dependent conditional priors, the new variational objective naturally encourages a better match between the posterior and prior conditionals, and the learning of the latent categories encoding the major source of variation of the original data in an unsupervised manner. Through sampling continuous latent code from the data-dependent conditional priors, we are able to generate new samples from the individual mixture components corresponding, to the multimodal structure over the original data. Moreover, we unify and analyse our objective under different independence assumptions for the joint distribution of the continuous and discrete latent variables. We provide an empirical evaluation on one synthetic dataset and three image datasets, FashionMNIST, MNIST, and Omniglot, illustrating the generative performance of our new model comparing to multiple baselines. |
Lavda, Frantzeska; Gregorová, Magda; Kalousis, Alexandros Improving VAE generations of multimodal data through data-dependent conditional priors Conference 24th European Conference on Artificial Intelligence, 325 , IOS Press, 2020. @conference{Lavda2020ecai, title = {Improving VAE generations of multimodal data through data-dependent conditional priors}, author = {Frantzeska Lavda and Magda Gregorová and Alexandros Kalousis}, url = {http://ebooks.iospress.nl/volumearticle/55021 https://bitbucket.org/dmmlgeneva/cp-vae/src/master/}, doi = {10.3233/FAIA200226}, year = {2020}, date = {2020-08-01}, booktitle = {24th European Conference on Artificial Intelligence}, journal = {IOS press}, volume = {325}, pages = {1254-1261}, publisher = {IOS Press}, abstract = {One of the major shortcomings of variational autoencoders is the inability to produce generations from the individual modalities of data originating from mixture distributions. This is primarily due to the use of a simple isotropic Gaussian as the prior for the latent code in the ancestral sampling procedure for the data generations. We propose a novel formulation of variational autoencoders, conditional prior VAE (CP-VAE), which learns to differentiate between the individual mixture components and therefore allows for generations from the distributional data clusters. We assume a two-level generative process with a continuous (Gaussian) latent variable sampled conditionally on a discrete (categorical) latent component. The new variational objective naturally couples the learning of the posterior and prior conditionals, and the learning of the latent categories encoding the multimodality of the original data in an unsupervised manner. The data-dependent conditional priors are then used to sample the continuous latent code when generating new samples from the individual mixture components corresponding to the multimodal structure of the original data. Our experimental results illustrate the generative performance of our new model comparing to multiple baselines.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } One of the major shortcomings of variational autoencoders is the inability to produce generations from the individual modalities of data originating from mixture distributions. This is primarily due to the use of a simple isotropic Gaussian as the prior for the latent code in the ancestral sampling procedure for the data generations. We propose a novel formulation of variational autoencoders, conditional prior VAE (CP-VAE), which learns to differentiate between the individual mixture components and therefore allows for generations from the distributional data clusters. We assume a two-level generative process with a continuous (Gaussian) latent variable sampled conditionally on a discrete (categorical) latent component. The new variational objective naturally couples the learning of the posterior and prior conditionals, and the learning of the latent categories encoding the multimodality of the original data in an unsupervised manner. The data-dependent conditional priors are then used to sample the continuous latent code when generating new samples from the individual mixture components corresponding to the multimodal structure of the original data. Our experimental results illustrate the generative performance of our new model comparing to multiple baselines. |
Alayrac, Jean-Baptiste; Recasens, Adrià; Schneider, Rosalia; Arandjelović, Relja; Ramapuram, Jason; Fauw, Jeffrey De; Smaira, Lucas; Dieleman, Sander; Zisserman, Andrew Self-Supervised MultiModal Versatile Networks Unpublished 2020. @unpublished{Alayrac2020, title = {Self-Supervised MultiModal Versatile Networks}, author = {Jean-Baptiste Alayrac and Adrià Recasens and Rosalia Schneider and Relja Arandjelović and Jason Ramapuram and Jeffrey De Fauw and Lucas Smaira and Sander Dieleman and Andrew Zisserman}, url = {https://arxiv.org/abs/2006.16228}, year = {2020}, date = {2020-06-29}, abstract = {Videos are a rich source of multi-modal supervision. In this work, we learn representations using self-supervision by leveraging three modalities naturally present in videos: vision, audio and language. To this end, we introduce the notion of a multimodal versatile network -- a network that can ingest multiple modalities and whose representations enable downstream tasks in multiple modalities. In particular, we explore how best to combine the modalities, such that fine-grained representations of audio and vision can be maintained, whilst also integrating text into a common embedding. Driven by versatility, we also introduce a novel process of deflation, so that the networks can be effortlessly applied to the visual data in the form of video or a static image. We demonstrate how such networks trained on large collections of unlabelled video data can be applied on video, video-text, image and audio tasks. Equipped with these representations, we obtain state-of-the-art performance on multiple challenging benchmarks including UCF101, HMDB51 and ESC-50 when compared to previous self-supervised work. }, keywords = {}, pubstate = {published}, tppubtype = {unpublished} } Videos are a rich source of multi-modal supervision. In this work, we learn representations using self-supervision by leveraging three modalities naturally present in videos: vision, audio and language. To this end, we introduce the notion of a multimodal versatile network -- a network that can ingest multiple modalities and whose representations enable downstream tasks in multiple modalities. In particular, we explore how best to combine the modalities, such that fine-grained representations of audio and vision can be maintained, whilst also integrating text into a common embedding. Driven by versatility, we also introduce a novel process of deflation, so that the networks can be effortlessly applied to the visual data in the form of video or a static image. We demonstrate how such networks trained on large collections of unlabelled video data can be applied on video, video-text, image and audio tasks. Equipped with these representations, we obtain state-of-the-art performance on multiple challenging benchmarks including UCF101, HMDB51 and ESC-50 when compared to previous self-supervised work. |
Ramapuram, Jason; Gregorova, Magda; Kalousis, Alexandros Lifelong generative modeling Journal Article Neurocomputing, 404 , pp. 381 - 400, 2020, ISSN: 0925-2312, (Code: https://bitbucket.org/dmmlgeneva/lifelonggenerativemodeling). @article{RAMAPURAM2020381, title = {Lifelong generative modeling}, author = {Jason Ramapuram and Magda Gregorova and Alexandros Kalousis}, url = {http://www.sciencedirect.com/science/article/pii/S0925231220303623 https://bitbucket.org/dmmlgeneva/lifelonggenerativemodeling}, doi = {https://doi.org/10.1016/j.neucom.2020.02.115}, issn = {0925-2312}, year = {2020}, date = {2020-01-01}, journal = {Neurocomputing}, volume = {404}, pages = {381 - 400}, abstract = {Lifelong learning is the problem of learning multiple consecutive tasks in a sequential manner, where knowledge gained from previous tasks is retained and used to aid future learning over the lifetime of the learner. It is essential towards the development of intelligent machines that can adapt to their surroundings. In this work we focus on a lifelong learning approach to unsupervised generative modeling, where we continuously incorporate newly observed distributions into a learned model. We do so through a student-teacher Variational Autoencoder architecture which allows us to learn and preserve all the distributions seen so far, without the need to retain the past data nor the past models. Through the introduction of a novel cross-model regularizer, inspired by a Bayesian update rule, the student model leverages the information learned by the teacher, which acts as a probabilistic knowledge store. The regularizer reduces the effect of catastrophic interference that appears when we learn over sequences of distributions. We validate our model’s performance on sequential variants of MNIST, FashionMNIST, PermutedMNIST, SVHN and Celeb-A and demonstrate that our model mitigates the effects of catastrophic interference faced by neural networks in sequential learning scenarios.}, note = {Code: https://bitbucket.org/dmmlgeneva/lifelonggenerativemodeling}, keywords = {}, pubstate = {published}, tppubtype = {article} } Lifelong learning is the problem of learning multiple consecutive tasks in a sequential manner, where knowledge gained from previous tasks is retained and used to aid future learning over the lifetime of the learner. It is essential towards the development of intelligent machines that can adapt to their surroundings. In this work we focus on a lifelong learning approach to unsupervised generative modeling, where we continuously incorporate newly observed distributions into a learned model. We do so through a student-teacher Variational Autoencoder architecture which allows us to learn and preserve all the distributions seen so far, without the need to retain the past data nor the past models. Through the introduction of a novel cross-model regularizer, inspired by a Bayesian update rule, the student model leverages the information learned by the teacher, which acts as a probabilistic knowledge store. The regularizer reduces the effect of catastrophic interference that appears when we learn over sequences of distributions. We validate our model’s performance on sequential variants of MNIST, FashionMNIST, PermutedMNIST, SVHN and Celeb-A and demonstrate that our model mitigates the effects of catastrophic interference faced by neural networks in sequential learning scenarios. |
2019 |
Mollaysa, Amina; Kalousis, Alexandros; Bruno, Eric; Diephuis, Maurits Learning to Augment with Feature Side-information Conference Proceedings of The Eleventh Asian Conference on Machine Learning, 2019. @conference{pmlr-v101-mollaysa19a, title = {Learning to Augment with Feature Side-information}, author = {Amina Mollaysa and Alexandros Kalousis and Eric Bruno and Maurits Diephuis }, editor = { November 17 - 19, 2019}, url = {http://proceedings.mlr.press/v101/mollaysa19a.html}, year = {2019}, date = {2019-11-19}, booktitle = {Proceedings of The Eleventh Asian Conference on Machine Learning}, abstract = {Neural networks typically need huge amounts of data to train in order to get reasonable generalizable results. A common approach is to artificially generate samples by using prior knowledge of the data properties or other relevant domain knowledge. However, if the assumptions on the data properties are not accurate or the domain knowledge is irrelevant to the task at hand, one may end up degenerating learning performance by using such augmented data in comparison to simply training on the limited available dataset. We propose a critical data augmentation method using feature side-information, which is obtained from domain knowledge and provides detailed information about features' intrinsic properties. Most importantly, we introduce an instance wise quality checking procedure on the augmented data. It filters out irrelevant or harmful augmented data prior to entering the model. We validated this approach on both synthetic and real-world datasets, specifically in a scenario where the data augmentation is done based on a task independent, unreliable source of information. The experiments show that the introduced critical data augmentation scheme helps avoid performance degeneration resulting from incorporating wrong augmented data. }, keywords = {}, pubstate = {published}, tppubtype = {conference} } Neural networks typically need huge amounts of data to train in order to get reasonable generalizable results. A common approach is to artificially generate samples by using prior knowledge of the data properties or other relevant domain knowledge. However, if the assumptions on the data properties are not accurate or the domain knowledge is irrelevant to the task at hand, one may end up degenerating learning performance by using such augmented data in comparison to simply training on the limited available dataset. We propose a critical data augmentation method using feature side-information, which is obtained from domain knowledge and provides detailed information about features' intrinsic properties. Most importantly, we introduce an instance wise quality checking procedure on the augmented data. It filters out irrelevant or harmful augmented data prior to entering the model. We validated this approach on both synthetic and real-world datasets, specifically in a scenario where the data augmentation is done based on a task independent, unreliable source of information. The experiments show that the introduced critical data augmentation scheme helps avoid performance degeneration resulting from incorporating wrong augmented data. |
Anagnostopoulos, Grigorios G; Kalousis, Alexandros A Reproducible Comparison of RSSI Fingerprinting Localization Methods Using LoRaWAN Conference 2019, ISBN: 978-1-7281-2082-9, (WPNC 2019, Preprint available: https://arxiv.org/abs/1908.05085). @conference{Anagnostopoulos2019LoRa, title = {A Reproducible Comparison of RSSI Fingerprinting Localization Methods Using LoRaWAN}, author = {Grigorios G. Anagnostopoulos and Alexandros Kalousis}, url = {https://ieeexplore.ieee.org/document/8970177}, doi = {https://doi.org/10.1109/WPNC47567.2019.8970177}, isbn = {978-1-7281-2082-9}, year = {2019}, date = {2019-10-23}, abstract = {The use of fingerprinting localization techniques in outdoor IoT settings has started gaining popularity over the recent years. Communication signals of Low Power Wide Area Networks (LPWAN), such as LoRaWAN, are used to estimate the location of low power mobile devices. In this study, a publicly available dataset of LoRaWAN RSSI measurements is utilized to compare different machine learning methods and their accuracy in producing location estimates. The tested methods are: the k Nearest Neighbours method, the Extra Trees method and a neural network approach using a Multilayer Perceptron. To facilitate the reproducibility of tests and the comparability of results, the code and the train/validation/test split of the dataset used in this study have become available. The neural network approach was the method with the highest accuracy, achieving a mean error of 357 meters and a median error of 206 meters.}, note = {WPNC 2019, Preprint available: https://arxiv.org/abs/1908.05085}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The use of fingerprinting localization techniques in outdoor IoT settings has started gaining popularity over the recent years. Communication signals of Low Power Wide Area Networks (LPWAN), such as LoRaWAN, are used to estimate the location of low power mobile devices. In this study, a publicly available dataset of LoRaWAN RSSI measurements is utilized to compare different machine learning methods and their accuracy in producing location estimates. The tested methods are: the k Nearest Neighbours method, the Extra Trees method and a neural network approach using a Multilayer Perceptron. To facilitate the reproducibility of tests and the comparability of results, the code and the train/validation/test split of the dataset used in this study have become available. The neural network approach was the method with the highest accuracy, achieving a mean error of 357 meters and a median error of 206 meters. |
Anagnostopoulos, Grigorios G; Kalousis, Alexandros 2019, ISBN: 978-1-7281-1788-1, (IPIN 2019, Preprint available : https://arxiv.org/abs/1908.06851). @conference{Anagnostopoulos2019IPIN, title = {A Reproducible Analysis of RSSI Fingerprinting for Outdoor Localization Using Sigfox: Preprocessing and Hyperparameter Tuning}, author = {Grigorios G. Anagnostopoulos and Alexandros Kalousis}, url = {https://ieeexplore.ieee.org/document/8911792}, doi = {10.1109/IPIN.2019.8911792}, isbn = {978-1-7281-1788-1}, year = {2019}, date = {2019-09-30}, abstract = {Fingerprinting techniques, which are a common method for indoor localization, have been recently applied with success into outdoor settings. Particularly, the communication signals of Low Power Wide Area Networks (LPWAN) such as Sigfox, have been used for localization. In this rather recent field of study, not many publicly available datasets, which would facilitate the consistent comparison of different positioning systems, exist so far. In the current study, a published dataset of RSSI measurements on a Sigfox network deployed in Antwerp, Belgium is used to analyse the appropriate selection of preprocessing steps and to tune the hyperparameters of a kNN fingerprinting method. Initially, the tuning of hyperparameter k for a variety of distance metrics, and the selection of efficient data transformation schemes, proposed by relevant works, is presented. In addition, accuracy improvements are achieved in this study, by a detailed examination of the appropriate adjustment of the parameters of the data transformation schemes tested, and of the handling of out of range values. With the appropriate tuning of these factors, the achieved mean localization error was 298 meters, and the median error was 109 meters. To facilitate the reproducibility of tests and comparability of results, the code and train/validation/test split used in this study are available.}, note = {IPIN 2019, Preprint available : https://arxiv.org/abs/1908.06851}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Fingerprinting techniques, which are a common method for indoor localization, have been recently applied with success into outdoor settings. Particularly, the communication signals of Low Power Wide Area Networks (LPWAN) such as Sigfox, have been used for localization. In this rather recent field of study, not many publicly available datasets, which would facilitate the consistent comparison of different positioning systems, exist so far. In the current study, a published dataset of RSSI measurements on a Sigfox network deployed in Antwerp, Belgium is used to analyse the appropriate selection of preprocessing steps and to tune the hyperparameters of a kNN fingerprinting method. Initially, the tuning of hyperparameter k for a variety of distance metrics, and the selection of efficient data transformation schemes, proposed by relevant works, is presented. In addition, accuracy improvements are achieved in this study, by a detailed examination of the appropriate adjustment of the parameters of the data transformation schemes tested, and of the handling of out of range values. With the appropriate tuning of these factors, the achieved mean localization error was 298 meters, and the median error was 109 meters. To facilitate the reproducibility of tests and comparability of results, the code and train/validation/test split used in this study are available. |
Ramapuram, Jason; Webb, Russ Differentiable Approximation Bridges For Training Networks Containing Non-Differentiable Functions. Conference IEEE WCCI 2020, IEEE 2019, (Code: http://github.com/apple/ml-dab). @conference{Ramapuram2019, title = {Differentiable Approximation Bridges For Training Networks Containing Non-Differentiable Functions.}, author = {Jason Ramapuram and Russ Webb}, url = {https://arxiv.org/abs/1905.03658 http://github.com/apple/ml-dab}, year = {2019}, date = {2019-07-19}, booktitle = {IEEE WCCI 2020}, organization = {IEEE}, abstract = {Modern neural network training relies on piece-wise (sub-)differentiable functions in order to use backpropation for efficient calculation of gradients. In this work, we introduce a novel method to allow for non-differentiable functions at intermediary layers of deep neural networks. We do so through the introduction of a differentiable approximation bridge (DAB) neural network which provides smooth approximations to the gradient of the non-differentiable function. We present strong empirical results (performing over 600 experiments) in three different domains: unsupervised (image) representation learning, image classification, and sequence sorting to demonstrate that our proposed method improves state of the art performance. We demonstrate that utilizing non-differentiable functions in unsupervised (image) representation learning improves reconstruction quality and posterior linear separability by 10%. We also observe an accuracy improvement of 77% in neural sequence sorting and a 25% improvement against the straight-through estimator [3] in an image classification setting with the sort non-linearity. This work enables the usage of functions that were previously not usable in neural networks.}, note = {Code: http://github.com/apple/ml-dab}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Modern neural network training relies on piece-wise (sub-)differentiable functions in order to use backpropation for efficient calculation of gradients. In this work, we introduce a novel method to allow for non-differentiable functions at intermediary layers of deep neural networks. We do so through the introduction of a differentiable approximation bridge (DAB) neural network which provides smooth approximations to the gradient of the non-differentiable function. We present strong empirical results (performing over 600 experiments) in three different domains: unsupervised (image) representation learning, image classification, and sequence sorting to demonstrate that our proposed method improves state of the art performance. We demonstrate that utilizing non-differentiable functions in unsupervised (image) representation learning improves reconstruction quality and posterior linear separability by 10%. We also observe an accuracy improvement of 77% in neural sequence sorting and a 25% improvement against the straight-through estimator [3] in an image classification setting with the sort non-linearity. This work enables the usage of functions that were previously not usable in neural networks. |
Blondé, Lionel; Kalousis, Alexandros Sample-Efficient Imitation Learning via Generative Adversarial Nets Inproceedings The 22nd International Conference on Artificial Intelligence and Statistics, AISTATS 2019, 16-18 April 2019, Naha, Okinawa, Japan, pp. 3138–3148, 2019. @inproceedings{DBLP:conf/aistats/BlondeK19, title = {Sample-Efficient Imitation Learning via Generative Adversarial Nets}, author = {Lionel Blondé and Alexandros Kalousis}, url = {http://proceedings.mlr.press/v89/blonde19a.html}, year = {2019}, date = {2019-01-01}, booktitle = {The 22nd International Conference on Artificial Intelligence and Statistics, AISTATS 2019, 16-18 April 2019, Naha, Okinawa, Japan}, pages = {3138--3148}, crossref = {DBLP:conf/aistats/2019}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Boget, Yoann Adversarial Regression. Generative Adversarial Networks for Non-Linear Regression: Theory and Assessment Journal Article arXiv preprint arXiv:1910.09106, 2019. @article{boget2019adversarial, title = {Adversarial Regression. Generative Adversarial Networks for Non-Linear Regression: Theory and Assessment}, author = {Yoann Boget}, url = {https://arxiv.org/abs/1910.09106}, year = {2019}, date = {2019-01-01}, journal = {arXiv preprint arXiv:1910.09106}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2018 |
Ramapuram, Jason; Lavda, Frantzeska; Webb, Russ; Kalousis, Alexandros; Diephuis, Maurits Variational Saccading: Efficient Inference for Large Resolution Images Conference BMVC 2019 & Bayesian Deep Learning Workshop Neurips, 2018, (Code: https://github.com/jramapuram/variational_saccading). @conference{Ramapuram2018, title = {Variational Saccading: Efficient Inference for Large Resolution Images}, author = {Jason Ramapuram and Frantzeska Lavda and Russ Webb and Alexandros Kalousis and Maurits Diephuis}, url = {https://arxiv.org/abs/1812.03170 https://github.com/jramapuram/variational_saccading}, year = {2018}, date = {2018-12-03}, booktitle = {BMVC 2019 & Bayesian Deep Learning Workshop Neurips}, journal = {Bayesian Deep Learning Workshop NeurIPS 2018}, abstract = {Image classification with deep neural networks is typically restricted to images of small dimensionality such as 224x244 in Resnet models. This limitation excludes the 4000x3000 dimensional images that are taken by modern smartphone cameras and smart devices. In this work, we aim to mitigate the prohibitive inferential and memory costs of operating in such large dimensional spaces. To sample from the high-resolution original input distribution, we propose using a smaller proxy distribution to learn the co-ordinates that correspond to regions of interest in the high-dimensional space. We introduce a new principled variational lower bound that captures the relationship of the proxy distribution's posterior and the original image's co-ordinate space in a way that maximizes the conditional classification likelihood. We empirically demonstrate on one synthetic benchmark and one real world large resolution DSLR camera image dataset that our method produces comparable results with 10x faster inference and lower memory consumption than a model that utilizes the entire original input distribution.}, note = {Code: https://github.com/jramapuram/variational_saccading}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Image classification with deep neural networks is typically restricted to images of small dimensionality such as 224x244 in Resnet models. This limitation excludes the 4000x3000 dimensional images that are taken by modern smartphone cameras and smart devices. In this work, we aim to mitigate the prohibitive inferential and memory costs of operating in such large dimensional spaces. To sample from the high-resolution original input distribution, we propose using a smaller proxy distribution to learn the co-ordinates that correspond to regions of interest in the high-dimensional space. We introduce a new principled variational lower bound that captures the relationship of the proxy distribution's posterior and the original image's co-ordinate space in a way that maximizes the conditional classification likelihood. We empirically demonstrate on one synthetic benchmark and one real world large resolution DSLR camera image dataset that our method produces comparable results with 10x faster inference and lower memory consumption than a model that utilizes the entire original input distribution. |
Gregorova, Magda Sparse learning for variable selection with structures and nonlinearities PhD Thesis 2018, (PhD Thesis ID: unige:115678). @phdthesis{, title = {Sparse learning for variable selection with structures and nonlinearities}, author = {Magda Gregorova}, url = {https://archive-ouverte.unige.ch/unige:115678}, year = {2018}, date = {2018-11-01}, abstract = {In this thesis we discuss machine learning methods performing automated variable selection for learning sparse predictive models. There are multiple reasons for promoting sparsity in the predictive models. By relying on a limited set of input variables the models naturally counteract the overfitting problem ubiquitous in learning from finite sets of training points. Sparse models are cheaper to use for predictions, they usually require lower computational resources and by relying on smaller sets of inputs can possibly reduce costs for data collection and storage. Sparse models can also contribute to better understanding of the investigated phenomenons as they are easier to interpret than full models.}, type = {PhD Thesis}, note = {PhD Thesis ID: unige:115678}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } In this thesis we discuss machine learning methods performing automated variable selection for learning sparse predictive models. There are multiple reasons for promoting sparsity in the predictive models. By relying on a limited set of input variables the models naturally counteract the overfitting problem ubiquitous in learning from finite sets of training points. Sparse models are cheaper to use for predictions, they usually require lower computational resources and by relying on smaller sets of inputs can possibly reduce costs for data collection and storage. Sparse models can also contribute to better understanding of the investigated phenomenons as they are easier to interpret than full models. |
Lavda, Frantzeska; Ramapuram, Jason; Gregorova, Magda; Kalousis, Alexandros Continual Classification Learning Using Generative Models Workshop Continual learning Workshop NeurIPS 2018, 2018. @workshop{DBLP:journals/corr/abs-1810-10612, title = {Continual Classification Learning Using Generative Models}, author = {Frantzeska Lavda and Jason Ramapuram and Magda Gregorova and Alexandros Kalousis}, url = {http://arxiv.org/abs/1810.10612}, year = {2018}, date = {2018-10-24}, booktitle = {Continual learning Workshop NeurIPS 2018}, journal = {CoRR}, keywords = {}, pubstate = {published}, tppubtype = {workshop} } |
Ramapuram, Jason; Webb, Russ A New Benchmark and Progress Toward Improved Weakly Supervised Learning Journal Article BMVC 2018, abs/1807.00126 , 2018, (Code: https://github.com/apple/ml-all-pairs). @article{DBLP:journals/corr/abs-1807-00126, title = {A New Benchmark and Progress Toward Improved Weakly Supervised Learning}, author = {Jason Ramapuram and Russ Webb}, url = {http://arxiv.org/abs/1807.00126 https://github.com/apple/ml-all-pairs}, year = {2018}, date = {2018-01-01}, journal = {BMVC 2018}, volume = {abs/1807.00126}, note = {Code: https://github.com/apple/ml-all-pairs}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Blondé, Lionel; Kalousis, Alexandros Sample-Efficient Imitation Learning via Generative Adversarial Nets Conference abs/1809.02064 , 2018. @conference{DBLP:journals/corr/abs-1809-02064b, title = {Sample-Efficient Imitation Learning via Generative Adversarial Nets}, author = {Lionel Blondé and Alexandros Kalousis}, url = {http://arxiv.org/abs/1809.02064 http://proceedings.mlr.press/v89/blonde19a/blonde19a.pdf https://github.com/lionelblonde/sam-pytorch https://github.com/lionelblonde/sam-tf}, year = {2018}, date = {2018-01-01}, journal = {CoRR}, volume = {abs/1809.02064}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Gregorová, Magda; Kalousis, Alexandros; Marchand-Maillet, Stéphane Structured nonlinear variable selection Inproceedings Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial Intelligence, UAI 2018, Monterey, California, USA, August 6-10, 2018, pp. 23–32, 2018. @inproceedings{DBLP:conf/uai/GregorovaKM18, title = {Structured nonlinear variable selection}, author = {Magda Gregorová and Alexandros Kalousis and Stéphane Marchand-Maillet}, url = {http://auai.org/uai2018/proceedings/papers/17.pdf}, year = {2018}, date = {2018-01-01}, booktitle = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial Intelligence, UAI 2018, Monterey, California, USA, August 6-10, 2018}, pages = {23--32}, crossref = {DBLP:conf/uai/2018}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Gregorová, Magda; Ramapuram, Jason; Kalousis, Alexandros; Marchand-Maillet, Stéphane Large-Scale Nonlinear Variable Selection via Kernel Random Features Inproceedings Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2018, Dublin, Ireland, September 10-14, 2018, Proceedings, Part II, pp. 177–192, 2018. @inproceedings{DBLP:conf/pkdd/GregorovaRKM18, title = {Large-Scale Nonlinear Variable Selection via Kernel Random Features}, author = {Magda Gregorová and Jason Ramapuram and Alexandros Kalousis and Stéphane Marchand-Maillet}, url = {https://doi.org/10.1007/978-3-030-10928-8_11}, doi = {10.1007/978-3-030-10928-8_11}, year = {2018}, date = {2018-01-01}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2018, Dublin, Ireland, September 10-14, 2018, Proceedings, Part II}, pages = {177--192}, crossref = {DBLP:conf/pkdd/2018-2}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2017 |
Gregorová, Magda; Kalousis, Alexandros; Marchand-Maillet, Stéphane Learning Predictive Leading Indicators for Forecasting Time Series Systems with Unknown Clusters of Forecast Tasks Inproceedings Proceedings of The 9th Asian Conference on Machine Learning, ACML 2017, Seoul, Korea, November 15-17, 2017., pp. 161–176, 2017. @inproceedings{DBLP:conf/acml/GregorovaKM17, title = {Learning Predictive Leading Indicators for Forecasting Time Series Systems with Unknown Clusters of Forecast Tasks}, author = {Magda Gregorová and Alexandros Kalousis and Stéphane Marchand-Maillet}, url = {http://proceedings.mlr.press/v77/gregorova17a/gregorova17a.pdf http://proceedings.mlr.press/v77/gregorova17a/gregorova17a-supp.pdf https://bitbucket.org/dmmlgeneva/var-leading-indicators}, year = {2017}, date = {2017-01-01}, booktitle = {Proceedings of The 9th Asian Conference on Machine Learning, ACML 2017, Seoul, Korea, November 15-17, 2017.}, pages = {161--176}, crossref = {DBLP:conf/acml/2017}, abstract = {We present a new method for forecasting systems of multiple interrelated time series. The method learns the forecast models together with discovering leading indicators from within the system that serve as good predictors improving the forecast accuracy and a cluster structure of the predictive tasks around these. The method is based on the classical linear vector autoregressive model (VAR) and links the discovery of the leading indicators to inferring sparse graphs of Granger causality. We formulate a new constrained optimisation problem to promote the desired sparse structures across the models and the sharing of information amongst the learning tasks in a multi-task manner. We propose an algorithm for solving the problem and document on a battery of synthetic and real-data experiments the advantages of our new method over baseline VAR models as well as the state-of-the-art sparse VAR learning methods. }, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We present a new method for forecasting systems of multiple interrelated time series. The method learns the forecast models together with discovering leading indicators from within the system that serve as good predictors improving the forecast accuracy and a cluster structure of the predictive tasks around these. The method is based on the classical linear vector autoregressive model (VAR) and links the discovery of the leading indicators to inferring sparse graphs of Granger causality. We formulate a new constrained optimisation problem to promote the desired sparse structures across the models and the sharing of information amongst the learning tasks in a multi-task manner. We propose an algorithm for solving the problem and document on a battery of synthetic and real-data experiments the advantages of our new method over baseline VAR models as well as the state-of-the-art sparse VAR learning methods. |
Mollaysa, Amina; Strasser, Pablo; Kalousis, Alexandros Regularising Non-linear Models Using Feature Side-information Inproceedings Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, 6-11 August 2017, pp. 2508–2517, 2017. @inproceedings{DBLP:conf/icml/MollaysaSK17, title = {Regularising Non-linear Models Using Feature Side-information}, author = {Amina Mollaysa and Pablo Strasser and Alexandros Kalousis}, url = {http://proceedings.mlr.press/v70/mollaysa17a/mollaysa17a.pdf http://proceedings.mlr.press/v70/mollaysa17a/mollaysa17a-supp.pdf https://bitbucket.org/dmmlgeneva/code_icml2017_regularising-non-linear-models-using-feature}, year = {2017}, date = {2017-01-01}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, 6-11 August 2017}, pages = {2508--2517}, crossref = {DBLP:conf/icml/2017}, abstract = {Very often features come with their own vectorial descriptions which provide detailed information about their properties. We refer to these vectorial descriptions as feature side-information. In the standard learning scenario, input is represented as a vector of features and the feature side-information is most often ignored or used only for feature selection prior to model fitting. We believe that feature side-information which carries information about features intrinsic property will help improve model prediction if used in a proper way during learning process. In this paper, we propose a framework that allows for the incorporation of the feature side-information during the learning of very general model families to improve the prediction performance. We control the structures of the learned models so that they reflect features’ similarities as these are defined on the basis of the side-information. We perform experiments on a number of benchmark datasets which show significant predictive performance gains, over a number of baselines, as a result of the exploitation of the side-information.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Very often features come with their own vectorial descriptions which provide detailed information about their properties. We refer to these vectorial descriptions as feature side-information. In the standard learning scenario, input is represented as a vector of features and the feature side-information is most often ignored or used only for feature selection prior to model fitting. We believe that feature side-information which carries information about features intrinsic property will help improve model prediction if used in a proper way during learning process. In this paper, we propose a framework that allows for the incorporation of the feature side-information during the learning of very general model families to improve the prediction performance. We control the structures of the learned models so that they reflect features’ similarities as these are defined on the basis of the side-information. We perform experiments on a number of benchmark datasets which show significant predictive performance gains, over a number of baselines, as a result of the exploitation of the side-information. |
Gregorová, Magda; Kalousis, Alexandros; Marchand-Maillet, Stéphane Forecasting and Granger Modelling with Non-linear Dynamical Dependencies Inproceedings Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2017, Skopje, Macedonia, September 18-22, 2017, Proceedings, Part II, pp. 544–558, 2017. @inproceedings{DBLP:conf/pkdd/GregorovaKM17, title = {Forecasting and Granger Modelling with Non-linear Dynamical Dependencies}, author = {Magda Gregorová and Alexandros Kalousis and Stéphane Marchand-Maillet}, url = {https://hesso.tind.io/record/2097/files/Gregorova_Kalousis_2017_forecasting_and_granger.pdf https://bitbucket.org/dmmlgeneva/nonlinear-granger}, doi = {10.1007/978-3-319-71246-8_33}, year = {2017}, date = {2017-01-01}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2017, Skopje, Macedonia, September 18-22, 2017, Proceedings, Part II}, pages = {544--558}, crossref = {DBLP:conf/pkdd/2017-2}, abstract = {Traditional linear methods for forecasting multivariate time series are not able to satisfactorily model the non-linear dependencies that may exist in non-Gaussian series. We build on the theory of learning vector-valued functions in the reproducing kernel Hilbert space and develop a method for learning prediction functions that accommodate such non-linearities. The method not only learns the predictive function but also the matrix-valued kernel underlying the function search space directly from the data. Our approach is based on learning multiple matrix-valued kernels, each of those composed of a set of input kernels and a set of output kernels learned in the cone of positive semi-definite matrices. In addition to superior predictive performance in the presence of strong non-linearities, our method also recovers the hidden dynamic relationships between the series and thus is a new alternative to existing graphical Granger techniques. }, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Traditional linear methods for forecasting multivariate time series are not able to satisfactorily model the non-linear dependencies that may exist in non-Gaussian series. We build on the theory of learning vector-valued functions in the reproducing kernel Hilbert space and develop a method for learning prediction functions that accommodate such non-linearities. The method not only learns the predictive function but also the matrix-valued kernel underlying the function search space directly from the data. Our approach is based on learning multiple matrix-valued kernels, each of those composed of a set of input kernels and a set of output kernels learned in the cone of positive semi-definite matrices. In addition to superior predictive performance in the presence of strong non-linearities, our method also recovers the hidden dynamic relationships between the series and thus is a new alternative to existing graphical Granger techniques. |
Ramapuram, Jason; Gregorova, Magda; Kalousis, Alexandros Lifelong Generative Modeling Journal Article Elsevier Neurocomputing 2020, abs/1705.09847 , 2017. @article{DBLP:journals/corr/RamapuramGK17, title = {Lifelong Generative Modeling}, author = {Jason Ramapuram and Magda Gregorova and Alexandros Kalousis}, url = {http://arxiv.org/abs/1705.09847}, year = {2017}, date = {2017-01-01}, journal = {Elsevier Neurocomputing 2020}, volume = {abs/1705.09847}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2016 |
Nguyen, Phong; Wang, Jun; Kalousis, Alexandros Factorizing LambdaMART for cold start recommendations Journal Article Machine Learning, 104 (2-3), pp. 223–242, 2016. @article{DBLP:journals/ml/NguyenWK16, title = {Factorizing LambdaMART for cold start recommendations}, author = {Phong Nguyen and Jun Wang and Alexandros Kalousis}, url = {https://hesso.tind.io/record/1651/files/Kalousis_2016_factorizing_lambdamart.pdf https://github.com/nguyenfunk/lambdamart-mf }, doi = {10.1007/s10994-016-5579-3}, year = {2016}, date = {2016-01-01}, journal = {Machine Learning}, volume = {104}, number = {2-3}, pages = {223--242}, abstract = {Recommendation systems often rely on point-wise loss metrics such as the mean squared error. However, in real recommendation settings only few items are presented to a user. This observation has recently encouraged the use of rank-based metrics. LambdaMART is the state-of-the-art algorithm in learning to rank which relies on such a metric. Motivated by the fact that very often the users’ and items’ descriptions as well as the preference behavior can be well summarized by a small number of hidden factors, we propose a novel algorithm, LambdaMART matrix factorization (LambdaMART-MF), that learns latent representations of users and items using gradient boosted trees. The algorithm factorizes LambdaMART by defining relevance scores as the inner product of the learned representations of the users and items. We regularise the learned latent representations so that they reflect the user and item manifolds as these are defined by their original feature based descriptors and the preference behavior. We also propose to use a weighted variant of NDCG to reduce the penalty for similar items with large rating discrepancy. We experiment on two very different recommendation datasets, meta-mining and movies-users, and evaluate the performance of LambdaMART-MF, with and without regularization, in the cold start setting as well as in the simpler matrix completion setting. The experiments show that the factorization of LambdaMart brings significant performance improvements both in the cold start and the matrix completion settings. The incorporation of regularisation seems to have a smaller performance impact.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Recommendation systems often rely on point-wise loss metrics such as the mean squared error. However, in real recommendation settings only few items are presented to a user. This observation has recently encouraged the use of rank-based metrics. LambdaMART is the state-of-the-art algorithm in learning to rank which relies on such a metric. Motivated by the fact that very often the users’ and items’ descriptions as well as the preference behavior can be well summarized by a small number of hidden factors, we propose a novel algorithm, LambdaMART matrix factorization (LambdaMART-MF), that learns latent representations of users and items using gradient boosted trees. The algorithm factorizes LambdaMART by defining relevance scores as the inner product of the learned representations of the users and items. We regularise the learned latent representations so that they reflect the user and item manifolds as these are defined by their original feature based descriptors and the preference behavior. We also propose to use a weighted variant of NDCG to reduce the penalty for similar items with large rating discrepancy. We experiment on two very different recommendation datasets, meta-mining and movies-users, and evaluate the performance of LambdaMART-MF, with and without regularization, in the cold start setting as well as in the simpler matrix completion setting. The experiments show that the factorization of LambdaMart brings significant performance improvements both in the cold start and the matrix completion settings. The incorporation of regularisation seems to have a smaller performance impact. |
2015 |
Nguyen, Phong Thesis PhD, University Of Geneva, Computer Science Department, 2015, (ID: unige:86131). @phdthesis{PhongPhD2015, title = {Meta-mining: a meta-learning framework to support the recommendation, planning and optimization of data mining workflows}, author = {Phong Nguyen}, url = {http://nbn-resolving.de/urn:nbn:ch:unige-861312}, year = {2015}, date = {2015-09-01}, school = {Thesis PhD, University Of Geneva, Computer Science Department}, abstract = {Data mining (DM) can be an extremely complex process in which the data analyst has to assemble into a DM workflow a number of data preprocessing and data mining operators in order to address her/his mining task. In order to support the DM user in the design of her/his DM process analysis, we propose a novel framework, aptly called meta-mining or process-oriented meta-learning, which extends significantly previous state of-the-art meta-learning and DM workflow planning approaches. We analyze the whole DM process in order to build meta-models which account for both the specificities of datasets and workflows, the pairs of which is related by some performance metrics such as classification accuracy. We build the meta-mining models by combining in an innovative manner a number of different technologies and by devising new algorithms; we combine mining over complex structures, workflows, in the presence of domain knowledge, and we derive planning techniques as well as new metric-learning and learning-to-rank algorithms which are defined over heterogeneous spaces. The planning system we propose is the first one to our knowledge which is able to design DM workflows whose expected performance is maximum for a given mining problem. Finally, our framework can be also applied to any recommendation problem with side information, i.e. descriptors on users and items,to solve the cold start problem.}, note = {ID: unige:86131}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Data mining (DM) can be an extremely complex process in which the data analyst has to assemble into a DM workflow a number of data preprocessing and data mining operators in order to address her/his mining task. In order to support the DM user in the design of her/his DM process analysis, we propose a novel framework, aptly called meta-mining or process-oriented meta-learning, which extends significantly previous state of-the-art meta-learning and DM workflow planning approaches. We analyze the whole DM process in order to build meta-models which account for both the specificities of datasets and workflows, the pairs of which is related by some performance metrics such as classification accuracy. We build the meta-mining models by combining in an innovative manner a number of different technologies and by devising new algorithms; we combine mining over complex structures, workflows, in the presence of domain knowledge, and we derive planning techniques as well as new metric-learning and learning-to-rank algorithms which are defined over heterogeneous spaces. The planning system we propose is the first one to our knowledge which is able to design DM workflows whose expected performance is maximum for a given mining problem. Finally, our framework can be also applied to any recommendation problem with side information, i.e. descriptors on users and items,to solve the cold start problem. |
Wang, Jun Improved Distance Metric Learning for Nearest Neighbor Classification PhD Thesis Phd Thesis, University of Geneva, Computer Science Department, 2015, (ID: unige:76989). @phdthesis{WangPhd2015, title = {Improved Distance Metric Learning for Nearest Neighbor Classification}, author = {Jun Wang}, url = {http://nbn-resolving.de/urn:nbn:ch:unige-769894 https://archive-ouverte.unige.ch/unige:76989/ATTACHMENT01}, year = {2015}, date = {2015-07-01}, school = {Phd Thesis, University of Geneva, Computer Science Department}, abstract = {Machine learning is about learning predictive models from data. To generalize well, many machine learning algorithms assume that the predictions of the near by data points are similar. Therefore, how to measure the closeness of data points becomes crucial in these learning algorithms. Metric learning solves this challenge by learning a data-dependent metric. Over the last ten years, it attracted a lot of attentions and becomes one of the main approaches to addressing the distance/similarity learning problem. The most studied approach is single metric learning. It learns one distance metric in the whole input data space. This approaches is often computational eefficient. However, it might not be flexible enough to learn well the distance in different data neighborhoods. This motivated local metric learning and nonlinear metric learning. In local metric learning approach, the algorithm learns one distance metric in a neighborhood. Such that, distance in different neighborhoods can be well fitted. The nonlinear metric learning follows a different approach to increase the model complexity. It first maps data points into a new data space through a nonlinear feature mapping and then a single metric is learned in the new data space. Despite the success of various metric learning algorithms, each algorithm comes with its own set of limitations. In this thesis, we focus on their limitations and develop novel metric learning algorithms. We focus on metric learning for Nearest Neighbor (NN) classification; NN is one of the most well-known algorithms depending strongly on the distance metric. Four different of metric learning algorithms are contributed, including one single metric learning algorithm, one local metric learning algorithm and two nonlinear metric learning algorithms. The developed metric learning algorithms improved over the state-of-the-arts in terms of predictive performance and algorithmic scalability.}, note = {ID: unige:76989}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } Machine learning is about learning predictive models from data. To generalize well, many machine learning algorithms assume that the predictions of the near by data points are similar. Therefore, how to measure the closeness of data points becomes crucial in these learning algorithms. Metric learning solves this challenge by learning a data-dependent metric. Over the last ten years, it attracted a lot of attentions and becomes one of the main approaches to addressing the distance/similarity learning problem. The most studied approach is single metric learning. It learns one distance metric in the whole input data space. This approaches is often computational eefficient. However, it might not be flexible enough to learn well the distance in different data neighborhoods. This motivated local metric learning and nonlinear metric learning. In local metric learning approach, the algorithm learns one distance metric in a neighborhood. Such that, distance in different neighborhoods can be well fitted. The nonlinear metric learning follows a different approach to increase the model complexity. It first maps data points into a new data space through a nonlinear feature mapping and then a single metric is learned in the new data space. Despite the success of various metric learning algorithms, each algorithm comes with its own set of limitations. In this thesis, we focus on their limitations and develop novel metric learning algorithms. We focus on metric learning for Nearest Neighbor (NN) classification; NN is one of the most well-known algorithms depending strongly on the distance metric. Four different of metric learning algorithms are contributed, including one single metric learning algorithm, one local metric learning algorithm and two nonlinear metric learning algorithms. The developed metric learning algorithms improved over the state-of-the-arts in terms of predictive performance and algorithmic scalability. |
Keet, Maria C; Lawrynowicz, Agnieszka; d'Amato, Claudia; Kalousis, Alexandros; Nguyen, Phong; Palma, Raul; Stevens, Robert; Hilario, Melanie The Data Mining OPtimization Ontology Journal Article J. Web Sem., 32 , pp. 43–53, 2015. @article{DBLP:journals/ws/KeetLdKNPSH15, title = {The Data Mining OPtimization Ontology}, author = {Maria C Keet and Agnieszka Lawrynowicz and Claudia d'Amato and Alexandros Kalousis and Phong Nguyen and Raul Palma and Robert Stevens and Melanie Hilario}, url = {http://www.dmo-foundry.org/DMOP}, doi = {10.1016/j.websem.2015.01.001}, year = {2015}, date = {2015-01-01}, journal = {J. Web Sem.}, volume = {32}, pages = {43--53}, abstract = {The Data Mining OPtimization Ontology (DMOP) has been developed to support informed decision-making at various choice points of the data mining process. The ontology can be used by data miners and deployed in ontology-driven information systems. The primary purpose for which DMOP has been developed is the automation of algorithm and model selection through semantic meta-mining that makes use of an ontology-based meta-analysis of complete data mining processes in view of extracting patterns associated with mining performance. To this end, DMOP contains detailed descriptions of data mining tasks (e.g., learning, feature selection), data, algorithms, hypotheses such as mined models or patterns, and workflows. A development methodology was used for DMOP, including items such as competency questions and foundational ontology reuse. Several non-trivial modeling problems were encountered and due to the complexity of the data mining details, the ontology requires the use of the OWL 2 DL profile. DMOP was successfully evaluated for semantic meta-mining and used in constructing the Intelligent Discovery Assistant, deployed at the popular data mining environment RapidMiner.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The Data Mining OPtimization Ontology (DMOP) has been developed to support informed decision-making at various choice points of the data mining process. The ontology can be used by data miners and deployed in ontology-driven information systems. The primary purpose for which DMOP has been developed is the automation of algorithm and model selection through semantic meta-mining that makes use of an ontology-based meta-analysis of complete data mining processes in view of extracting patterns associated with mining performance. To this end, DMOP contains detailed descriptions of data mining tasks (e.g., learning, feature selection), data, algorithms, hypotheses such as mined models or patterns, and workflows. A development methodology was used for DMOP, including items such as competency questions and foundational ontology reuse. Several non-trivial modeling problems were encountered and due to the complexity of the data mining details, the ontology requires the use of the OWL 2 DL profile. DMOP was successfully evaluated for semantic meta-mining and used in constructing the Intelligent Discovery Assistant, deployed at the popular data mining environment RapidMiner. |
Sun, Ke; Wang, Jun; Kalousis, Alexandros; Marchand-Maillet, Stéphane Information Geometry and Minimum Description Length Networks Inproceedings Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, Lille, France, 6-11 July 2015, pp. 49–58, 2015. @inproceedings{DBLP:conf/icml/SunWKM15, title = {Information Geometry and Minimum Description Length Networks}, author = {Ke Sun and Jun Wang and Alexandros Kalousis and Stéphane Marchand-Maillet}, url = {http://jmlr.org/proceedings/papers/v37/suna15.html}, year = {2015}, date = {2015-01-01}, booktitle = {Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, Lille, France, 6-11 July 2015}, pages = {49--58}, crossref = {DBLP:conf/icml/2015}, abstract = {We study parametric unsupervised mixture learning. We measure the loss of intrinsic information from the observations to complex mixture models, and then to simple mixture models. We present a geometric picture, where all these representations are regarded as free points in the space of probability distributions. Based on minimum description length, we derive a simple geometric principle to learn all these models together. We present a new learning machine with theories, algorithms, and simulations.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We study parametric unsupervised mixture learning. We measure the loss of intrinsic information from the observations to complex mixture models, and then to simple mixture models. We present a geometric picture, where all these representations are regarded as free points in the space of probability distributions. Based on minimum description length, we derive a simple geometric principle to learn all these models together. We present a new learning machine with theories, algorithms, and simulations. |
Sun, Ke; Wang, Jun; Kalousis, Alexandros; Marchand-Maillet, Stéphane Space-Time Local Embeddings Inproceedings Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, December 7-12, 2015, Montreal, Quebec, Canada, pp. 100–108, 2015. @inproceedings{DBLP:conf/nips/SunWKM15, title = {Space-Time Local Embeddings}, author = {Ke Sun and Jun Wang and Alexandros Kalousis and Stéphane Marchand-Maillet}, url = {http://papers.nips.cc/paper/5971-space-time-local-embeddings}, year = {2015}, date = {2015-01-01}, booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, December 7-12, 2015, Montreal, Quebec, Canada}, pages = {100--108}, crossref = {DBLP:conf/nips/2015}, abstract = {Space-time is a profound concept in physics. This concept was shown to be useful for dimensionality reduction. We present basic definitions with interesting counter-intuitions. We give theoretical propositions to show that space-time is a more powerful representation than Euclidean space. We apply this concept to manifold learning for preserving local information. Empirical results on non-metric datasets show that more information can be preserved in space-time.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Space-time is a profound concept in physics. This concept was shown to be useful for dimensionality reduction. We present basic definitions with interesting counter-intuitions. We give theoretical propositions to show that space-time is a more powerful representation than Euclidean space. We apply this concept to manifold learning for preserving local information. Empirical results on non-metric datasets show that more information can be preserved in space-time. |
Gregorova, Magda; Kalousis, Alexandros; Dinuzzo, Francesco Functional learning of time-series models preserving Granger-causality structures Inproceedings Proceedings of the Time Series Workshop of the 29th Neural Information Processing Systems conference, NIPS-2015, 11th December 2015 2015. @inproceedings{gregorova2015functional, title = {Functional learning of time-series models preserving Granger-causality structures}, author = {Magda Gregorova and Alexandros Kalousis and Francesco Dinuzzo}, url = {http://hesso.tind.io/record/1650}, year = {2015}, date = {2015-01-01}, booktitle = {Proceedings of the Time Series Workshop of the 29th Neural Information Processing Systems conference, NIPS-2015}, number = {CONFERENCE}, organization = {11th December 2015}, abstract = {We develop a functional learning approach to modelling systems of time series which preserves the ability of standard linear time-series models (VARs) to uncover the Granger-causality links in between the series of the system while allowing for richer functional relationships. We propose a framework for learning multiple output-kernels associated with multiple input-kernels over a structured input space and outline an algorithm for simultaneous learning of the kernels with the model parameters with various forms of regularization including non-smooth sparsity inducing norms. We present results of synthetic experiments illustrating the benefits of the described approach.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We develop a functional learning approach to modelling systems of time series which preserves the ability of standard linear time-series models (VARs) to uncover the Granger-causality links in between the series of the system while allowing for richer functional relationships. We propose a framework for learning multiple output-kernels associated with multiple input-kernels over a structured input space and outline an algorithm for simultaneous learning of the kernels with the model parameters with various forms of regularization including non-smooth sparsity inducing norms. We present results of synthetic experiments illustrating the benefits of the described approach. |
Gregorova, Magda; Kalousis, Alexandros; Marchand-Maillet, Stéphane Learning coherent Granger-causality in panel vector autoregressive models Inproceedings Proceedings of the Demand Forecasting Workshop of the 32nd International Conference on Machine Learning, ICML 2015. @inproceedings{gregorova2015learning, title = {Learning coherent Granger-causality in panel vector autoregressive models}, author = {Magda Gregorova and Alexandros Kalousis and Stéphane Marchand-Maillet}, url = {http://hesso.tind.io/record/1077}, year = {2015}, date = {2015-01-01}, booktitle = {Proceedings of the Demand Forecasting Workshop of the 32nd International Conference on Machine Learning}, number = {CONFERENCE}, organization = {ICML}, abstract = {We consider the problem of forecasting multiple time series across multiple cross-sections based solely on the past observations of the series. We propose to use panel vector autoregressive model to capture the inter-dependencies on the past values of the multiple series. We restrict the panel vector autoregressive model to exclude the cross-sectional relationships and propose a method to learn models with sparse Granger-causality structures coherent across the panel sections. The method extends the concepts of group variable selection and support union recovery into the panel setting by extending the group lasso penalty (Yuan & Lin, 2006) into matrix output regression setting with 3d-tensor of model parameters.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We consider the problem of forecasting multiple time series across multiple cross-sections based solely on the past observations of the series. We propose to use panel vector autoregressive model to capture the inter-dependencies on the past values of the multiple series. We restrict the panel vector autoregressive model to exclude the cross-sectional relationships and propose a method to learn models with sparse Granger-causality structures coherent across the panel sections. The method extends the concepts of group variable selection and support union recovery into the panel setting by extending the group lasso penalty (Yuan & Lin, 2006) into matrix output regression setting with 3d-tensor of model parameters. |
2014 |
Nguyen, Phong; Hilario, Melanie; Kalousis, Alexandros Using Meta-mining to Support Data Mining Workflow Planning and Optimization Journal Article J. Artif. Intell. Res., 51 , pp. 605–644, 2014. @article{DBLP:journals/jair/NguyenHK14, title = {Using Meta-mining to Support Data Mining Workflow Planning and Optimization}, author = {Phong Nguyen and Melanie Hilario and Alexandros Kalousis}, url = {https://doi.org/10.1613/jair.4377}, doi = {10.1613/jair.4377}, year = {2014}, date = {2014-01-01}, journal = {J. Artif. Intell. Res.}, volume = {51}, pages = {605--644}, abstract = {Knowledge Discovery in Databases is a complex process that involves many different data processing and learning operators. Today's Knowledge Discovery Support Systems can contain several hundred operators. A major challenge is to assist the user in designing workflows which are not only valid but also -- ideally -- optimize some performance measure associated with the user goal. In this paper we present such a system. The system relies on a meta-mining module which analyses past data mining experiments and extracts meta-mining models which associate dataset characteristics with workflow descriptors in view of workflow performance optimization. The meta-mining model is used within a data mining workflow planner, to guide the planner during the workflow planning. We learn the meta-mining models using a similarity learning approach, and extract the workflow descriptors by mining the workflows for generalized relational patterns accounting also for domain knowledge provided by a data mining ontology. We evaluate the quality of the data mining workflows that the system produces on a collection of real world datasets coming from biology and show that it produces workflows that are significantly better than alternative methods that can only do workflow selection and not planning.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Knowledge Discovery in Databases is a complex process that involves many different data processing and learning operators. Today's Knowledge Discovery Support Systems can contain several hundred operators. A major challenge is to assist the user in designing workflows which are not only valid but also -- ideally -- optimize some performance measure associated with the user goal. In this paper we present such a system. The system relies on a meta-mining module which analyses past data mining experiments and extracts meta-mining models which associate dataset characteristics with workflow descriptors in view of workflow performance optimization. The meta-mining model is used within a data mining workflow planner, to guide the planner during the workflow planning. We learn the meta-mining models using a similarity learning approach, and extract the workflow descriptors by mining the workflows for generalized relational patterns accounting also for domain knowledge provided by a data mining ontology. We evaluate the quality of the data mining workflows that the system produces on a collection of real world datasets coming from biology and show that it produces workflows that are significantly better than alternative methods that can only do workflow selection and not planning. |
Wang, Jun; Sun, Ke; Sha, Fei; Marchand-Maillet, Stéphane; Kalousis, Alexandros Two-Stage Metric Learning Inproceedings Proceedings of the 31th International Conference on Machine Learning, ICML 2014, Beijing, China, 21-26 June 2014, pp. 370–378, 2014. @inproceedings{DBLP:conf/icml/WangSSMK14, title = {Two-Stage Metric Learning}, author = {Jun Wang and Ke Sun and Fei Sha and Stéphane Marchand-Maillet and Alexandros Kalousis}, url = {http://jmlr.org/proceedings/papers/v32/wangc14.html}, year = {2014}, date = {2014-01-01}, booktitle = {Proceedings of the 31th International Conference on Machine Learning, ICML 2014, Beijing, China, 21-26 June 2014}, pages = {370--378}, crossref = {DBLP:conf/icml/2014}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2013 |
Do, Huyen; Kalousis, Alexandros Convex formulations of radius-margin based Support Vector Machines Inproceedings Proceedings of the 30th International Conference on Machine Learning, ICML 2013, Atlanta, GA, USA, 16-21 June 2013, pp. 169–177, 2013. @inproceedings{DBLP:conf/icml/DoK13, title = {Convex formulations of radius-margin based Support Vector Machines}, author = {Huyen Do and Alexandros Kalousis}, url = {http://jmlr.org/proceedings/papers/v28/do13.html}, year = {2013}, date = {2013-01-01}, booktitle = {Proceedings of the 30th International Conference on Machine Learning, ICML 2013, Atlanta, GA, USA, 16-21 June 2013}, pages = {169--177}, crossref = {DBLP:conf/icml/2013}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2012 |
Do, Huyen A unified framework for Support Vector Machines, Multiple Kernel Learning and metric learning PhD Thesis PhD Thesis, University of Geneva, Computer Science Department., 2012, (ID: unige:24004). @phdthesis{DoPhD2012, title = {A unified framework for Support Vector Machines, Multiple Kernel Learning and metric learning}, author = {Huyen Do}, url = {http://nbn-resolving.de/urn:nbn:ch:unige-240049}, year = {2012}, date = {2012-10-01}, school = {PhD Thesis, University of Geneva, Computer Science Department.}, abstract = {In this work we studied several families of learning algorithms, including Support Vector Machines, Multiple Kernel Learning and metric learning. We focused on two fundamental tasks in Machine Learning which are classification and feature selection. We derived novel learning algorithms for classification and for feature selection, which outperformed the state-of-the-art in several experiments. Moreover, we discovered the relationship among algorithms and built a unified framework of various learning algorithms, more precisely, a unified framework of Support Vector Machines, Multiple Kernel Learning and metric learning.}, note = {ID: unige:24004}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } In this work we studied several families of learning algorithms, including Support Vector Machines, Multiple Kernel Learning and metric learning. We focused on two fundamental tasks in Machine Learning which are classification and feature selection. We derived novel learning algorithms for classification and for feature selection, which outperformed the state-of-the-art in several experiments. Moreover, we discovered the relationship among algorithms and built a unified framework of various learning algorithms, more precisely, a unified framework of Support Vector Machines, Multiple Kernel Learning and metric learning. |
Nguyen, Phong; Wang, Jun; Hilario, Melanie; Kalousis, Alexandros Learning Heterogeneous Similarity Measures for Hybrid-Recommendations in Meta-Mining Inproceedings 12th IEEE International Conference on Data Mining, ICDM 2012, Brussels, Belgium, December 10-13, 2012, pp. 1026–1031, 2012. @inproceedings{DBLP:conf/icdm/NguyenWHK12, title = {Learning Heterogeneous Similarity Measures for Hybrid-Recommendations in Meta-Mining}, author = {Phong Nguyen and Jun Wang and Melanie Hilario and Alexandros Kalousis}, url = {https://doi.org/10.1109/ICDM.2012.41}, doi = {10.1109/ICDM.2012.41}, year = {2012}, date = {2012-01-01}, booktitle = {12th IEEE International Conference on Data Mining, ICDM 2012, Brussels, Belgium, December 10-13, 2012}, pages = {1026--1031}, crossref = {DBLP:conf/icdm/2012}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Woznica, Adam; Nguyen, Phong; Kalousis, Alexandros Model mining for robust feature selection Inproceedings The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '12, Beijing, China, August 12-16, 2012, pp. 913–921, 2012. @inproceedings{DBLP:conf/kdd/WoznicaNK12, title = {Model mining for robust feature selection}, author = {Adam Woznica and Phong Nguyen and Alexandros Kalousis}, url = {http://doi.acm.org/10.1145/2339530.2339674}, doi = {10.1145/2339530.2339674}, year = {2012}, date = {2012-01-01}, booktitle = {The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '12, Beijing, China, August 12-16, 2012}, pages = {913--921}, crossref = {DBLP:conf/kdd/2012}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Wang, Jun; Kalousis, Alexandros; Woznica, Adam Parametric Local Metric Learning for Nearest Neighbor Classification Inproceedings Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States., pp. 1610–1618, 2012. @inproceedings{DBLP:conf/nips/WangKW12, title = {Parametric Local Metric Learning for Nearest Neighbor Classification}, author = {Jun Wang and Alexandros Kalousis and Adam Woznica}, url = {http://papers.nips.cc/paper/4818-parametric-local-metric-learning-for-nearest-neighbor-classification}, year = {2012}, date = {2012-01-01}, booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States.}, pages = {1610--1618}, crossref = {DBLP:conf/nips/2012}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Wang, Jun; Woznica, Adam; Kalousis, Alexandros Learning Neighborhoods for Metric Learning Inproceedings Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2012, Bristol, UK, September 24-28, 2012. Proceedings, Part I, pp. 223–236, 2012. @inproceedings{DBLP:conf/pkdd/WangWK12, title = {Learning Neighborhoods for Metric Learning}, author = {Jun Wang and Adam Woznica and Alexandros Kalousis}, url = {https://doi.org/10.1007/978-3-642-33460-3_20}, doi = {10.1007/978-3-642-33460-3_20}, year = {2012}, date = {2012-01-01}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2012, Bristol, UK, September 24-28, 2012. Proceedings, Part I}, pages = {223--236}, crossref = {DBLP:conf/pkdd/2012-1}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Do, Huyen; Kalousis, Alexandros; Wang, Jun; Woznica, Adam A metric learning perspective of SVM: on the relation of LMNN and SVM Inproceedings Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics, AISTATS 2012, La Palma, Canary Islands, April 21-23, 2012, pp. 308–317, 2012. @inproceedings{DBLP:journals/jmlr/DoKWW12, title = {A metric learning perspective of SVM: on the relation of LMNN and SVM}, author = {Huyen Do and Alexandros Kalousis and Jun Wang and Adam Woznica}, url = {http://jmlr.csail.mit.edu/proceedings/papers/v22/do12.html}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics, AISTATS 2012, La Palma, Canary Islands, April 21-23, 2012}, pages = {308--317}, crossref = {DBLP:conf/aistats/2012}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Stendardo, Nabil; Kalousis, Alexandros Relationship-aware sequential pattern mining Journal Article CoRR, abs/1212.5389 , 2012. @article{DBLP:journals/corr/abs-1212-5389, title = {Relationship-aware sequential pattern mining}, author = {Nabil Stendardo and Alexandros Kalousis}, url = {http://arxiv.org/abs/1212.5389}, year = {2012}, date = {2012-01-01}, journal = {CoRR}, volume = {abs/1212.5389}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
-
Archives
- August 2024
- January 2024
- December 2023
- September 2023
- June 2023
- April 2023
- March 2023
- January 2023
- December 2022
- November 2022
- October 2022
- August 2022
- June 2022
- May 2022
- April 2022
- February 2022
- December 2021
- September 2021
- August 2021
- April 2021
- March 2021
- February 2021
- December 2020
- November 2020
- October 2020
- September 2020
- August 2020
- July 2020
- May 2020
- February 2020
- January 2020
- December 2019
- November 2019
- October 2019
- September 2019
- August 2019
- July 2019
- May 2019
- April 2019
- November 2018
- October 2018
- August 2018
- June 2018
- May 2018
- February 2018
- January 2018
- November 2017
-
Meta