2020
|
Alayrac, Jean-Baptiste; Recasens, Adrià; Schneider, Rosalia; Arandjelović, Relja; Ramapuram, Jason; Fauw, Jeffrey De; Smaira, Lucas; Dieleman, Sander; Zisserman, Andrew Self-Supervised MultiModal Versatile Networks Unpublished 2020. Abstract | Links | BibTeX @unpublished{Alayrac2020,
title = {Self-Supervised MultiModal Versatile Networks},
author = {Jean-Baptiste Alayrac and Adrià Recasens and Rosalia Schneider and Relja Arandjelović and Jason Ramapuram and Jeffrey De Fauw and Lucas Smaira and Sander Dieleman and Andrew Zisserman},
url = {https://arxiv.org/abs/2006.16228},
year = {2020},
date = {2020-06-29},
abstract = {Videos are a rich source of multi-modal supervision. In this work, we learn representations using self-supervision by leveraging three modalities naturally present in videos: vision, audio and language. To this end, we introduce the notion of a multimodal versatile network -- a network that can ingest multiple modalities and whose representations enable downstream tasks in multiple modalities. In particular, we explore how best to combine the modalities, such that fine-grained representations of audio and vision can be maintained, whilst also integrating text into a common embedding. Driven by versatility, we also introduce a novel process of deflation, so that the networks can be effortlessly applied to the visual data in the form of video or a static image. We demonstrate how such networks trained on large collections of unlabelled video data can be applied on video, video-text, image and audio tasks. Equipped with these representations, we obtain state-of-the-art performance on multiple challenging benchmarks including UCF101, HMDB51 and ESC-50 when compared to previous self-supervised work.
},
keywords = {},
pubstate = {published},
tppubtype = {unpublished}
}
Videos are a rich source of multi-modal supervision. In this work, we learn representations using self-supervision by leveraging three modalities naturally present in videos: vision, audio and language. To this end, we introduce the notion of a multimodal versatile network -- a network that can ingest multiple modalities and whose representations enable downstream tasks in multiple modalities. In particular, we explore how best to combine the modalities, such that fine-grained representations of audio and vision can be maintained, whilst also integrating text into a common embedding. Driven by versatility, we also introduce a novel process of deflation, so that the networks can be effortlessly applied to the visual data in the form of video or a static image. We demonstrate how such networks trained on large collections of unlabelled video data can be applied on video, video-text, image and audio tasks. Equipped with these representations, we obtain state-of-the-art performance on multiple challenging benchmarks including UCF101, HMDB51 and ESC-50 when compared to previous self-supervised work.
|
Ramapuram, Jason; Gregorova, Magda; Kalousis, Alexandros Lifelong generative modeling Journal Article Neurocomputing, 404 , pp. 381 - 400, 2020, ISSN: 0925-2312, (Code: https://bitbucket.org/dmmlgeneva/lifelonggenerativemodeling). Abstract | Links | BibTeX @article{RAMAPURAM2020381,
title = {Lifelong generative modeling},
author = {Jason Ramapuram and Magda Gregorova and Alexandros Kalousis},
url = {http://www.sciencedirect.com/science/article/pii/S0925231220303623
https://bitbucket.org/dmmlgeneva/lifelonggenerativemodeling},
doi = {https://doi.org/10.1016/j.neucom.2020.02.115},
issn = {0925-2312},
year = {2020},
date = {2020-01-01},
journal = {Neurocomputing},
volume = {404},
pages = {381 - 400},
abstract = {Lifelong learning is the problem of learning multiple consecutive tasks in a sequential manner, where knowledge gained from previous tasks is retained and used to aid future learning over the lifetime of the learner. It is essential towards the development of intelligent machines that can adapt to their surroundings. In this work we focus on a lifelong learning approach to unsupervised generative modeling, where we continuously incorporate newly observed distributions into a learned model. We do so through a student-teacher Variational Autoencoder architecture which allows us to learn and preserve all the distributions seen so far, without the need to retain the past data nor the past models. Through the introduction of a novel cross-model regularizer, inspired by a Bayesian update rule, the student model leverages the information learned by the teacher, which acts as a probabilistic knowledge store. The regularizer reduces the effect of catastrophic interference that appears when we learn over sequences of distributions. We validate our model’s performance on sequential variants of MNIST, FashionMNIST, PermutedMNIST, SVHN and Celeb-A and demonstrate that our model mitigates the effects of catastrophic interference faced by neural networks in sequential learning scenarios.},
note = {Code: https://bitbucket.org/dmmlgeneva/lifelonggenerativemodeling},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lifelong learning is the problem of learning multiple consecutive tasks in a sequential manner, where knowledge gained from previous tasks is retained and used to aid future learning over the lifetime of the learner. It is essential towards the development of intelligent machines that can adapt to their surroundings. In this work we focus on a lifelong learning approach to unsupervised generative modeling, where we continuously incorporate newly observed distributions into a learned model. We do so through a student-teacher Variational Autoencoder architecture which allows us to learn and preserve all the distributions seen so far, without the need to retain the past data nor the past models. Through the introduction of a novel cross-model regularizer, inspired by a Bayesian update rule, the student model leverages the information learned by the teacher, which acts as a probabilistic knowledge store. The regularizer reduces the effect of catastrophic interference that appears when we learn over sequences of distributions. We validate our model’s performance on sequential variants of MNIST, FashionMNIST, PermutedMNIST, SVHN and Celeb-A and demonstrate that our model mitigates the effects of catastrophic interference faced by neural networks in sequential learning scenarios. |
2019
|
Ramapuram, Jason; Webb, Russ Differentiable Approximation Bridges For Training Networks Containing Non-Differentiable Functions. Conference IEEE WCCI 2020, IEEE 2019, (Code: http://github.com/apple/ml-dab). Abstract | Links | BibTeX @conference{Ramapuram2019,
title = {Differentiable Approximation Bridges For Training Networks Containing Non-Differentiable Functions.},
author = {Jason Ramapuram and Russ Webb},
url = {https://arxiv.org/abs/1905.03658
http://github.com/apple/ml-dab},
year = {2019},
date = {2019-07-19},
booktitle = {IEEE WCCI 2020},
organization = {IEEE},
abstract = {Modern neural network training relies on piece-wise (sub-)differentiable functions in order to use backpropation for efficient calculation of gradients. In this work, we introduce a novel method to allow for non-differentiable functions at intermediary layers of deep neural networks. We do so through the introduction of a differentiable approximation bridge (DAB) neural network which provides smooth approximations to the gradient of the non-differentiable function. We present strong empirical results (performing over 600 experiments) in three different domains: unsupervised (image) representation learning, image classification, and sequence sorting to demonstrate that our proposed method improves state of the art performance. We demonstrate that utilizing non-differentiable functions in unsupervised (image) representation learning improves reconstruction quality and posterior linear separability by 10%. We also observe an accuracy improvement of 77% in neural sequence sorting and a 25% improvement against the straight-through estimator [3] in an image classification setting with the sort non-linearity. This work enables the usage of functions that were previously not usable in neural networks.},
note = {Code: http://github.com/apple/ml-dab},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Modern neural network training relies on piece-wise (sub-)differentiable functions in order to use backpropation for efficient calculation of gradients. In this work, we introduce a novel method to allow for non-differentiable functions at intermediary layers of deep neural networks. We do so through the introduction of a differentiable approximation bridge (DAB) neural network which provides smooth approximations to the gradient of the non-differentiable function. We present strong empirical results (performing over 600 experiments) in three different domains: unsupervised (image) representation learning, image classification, and sequence sorting to demonstrate that our proposed method improves state of the art performance. We demonstrate that utilizing non-differentiable functions in unsupervised (image) representation learning improves reconstruction quality and posterior linear separability by 10%. We also observe an accuracy improvement of 77% in neural sequence sorting and a 25% improvement against the straight-through estimator [3] in an image classification setting with the sort non-linearity. This work enables the usage of functions that were previously not usable in neural networks. |
2018
|
Ramapuram, Jason; Lavda, Frantzeska; Webb, Russ; Kalousis, Alexandros; Diephuis, Maurits Variational Saccading: Efficient Inference for Large Resolution Images Conference BMVC 2019 & Bayesian Deep Learning Workshop Neurips, 2018, (Code: https://github.com/jramapuram/variational_saccading). Abstract | Links | BibTeX @conference{Ramapuram2018,
title = {Variational Saccading: Efficient Inference for Large Resolution Images},
author = {Jason Ramapuram and Frantzeska Lavda and Russ Webb and Alexandros Kalousis and Maurits Diephuis},
url = {https://arxiv.org/abs/1812.03170
https://github.com/jramapuram/variational_saccading},
year = {2018},
date = {2018-12-03},
booktitle = {BMVC 2019 & Bayesian Deep Learning Workshop Neurips},
journal = {Bayesian Deep Learning Workshop NeurIPS 2018},
abstract = {Image classification with deep neural networks is typically restricted to images of small dimensionality such as 224x244 in Resnet models. This limitation excludes the 4000x3000 dimensional images that are taken by modern smartphone cameras and smart devices. In this work, we aim to mitigate the prohibitive inferential and memory costs of operating in such large dimensional spaces. To sample from the high-resolution original input distribution, we propose using a smaller proxy distribution to learn the co-ordinates that correspond to regions of interest in the high-dimensional space. We introduce a new principled variational lower bound that captures the relationship of the proxy distribution's posterior and the original image's co-ordinate space in a way that maximizes the conditional classification likelihood. We empirically demonstrate on one synthetic benchmark and one real world large resolution DSLR camera image dataset that our method produces comparable results with 10x faster inference and lower memory consumption than a model that utilizes the entire original input distribution.},
note = {Code: https://github.com/jramapuram/variational_saccading},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Image classification with deep neural networks is typically restricted to images of small dimensionality such as 224x244 in Resnet models. This limitation excludes the 4000x3000 dimensional images that are taken by modern smartphone cameras and smart devices. In this work, we aim to mitigate the prohibitive inferential and memory costs of operating in such large dimensional spaces. To sample from the high-resolution original input distribution, we propose using a smaller proxy distribution to learn the co-ordinates that correspond to regions of interest in the high-dimensional space. We introduce a new principled variational lower bound that captures the relationship of the proxy distribution's posterior and the original image's co-ordinate space in a way that maximizes the conditional classification likelihood. We empirically demonstrate on one synthetic benchmark and one real world large resolution DSLR camera image dataset that our method produces comparable results with 10x faster inference and lower memory consumption than a model that utilizes the entire original input distribution. |
Lavda, Frantzeska; Ramapuram, Jason; Gregorova, Magda; Kalousis, Alexandros Continual Classification Learning Using Generative Models Workshop Continual learning Workshop NeurIPS 2018, 2018. Links | BibTeX @workshop{DBLP:journals/corr/abs-1810-10612,
title = {Continual Classification Learning Using Generative Models},
author = {Frantzeska Lavda and Jason Ramapuram and Magda Gregorova and Alexandros Kalousis},
url = {http://arxiv.org/abs/1810.10612},
year = {2018},
date = {2018-10-24},
booktitle = {Continual learning Workshop NeurIPS 2018},
journal = {CoRR},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
|
Gregorová, Magda; Ramapuram, Jason; Kalousis, Alexandros; Marchand-Maillet, Stéphane Large-Scale Nonlinear Variable Selection via Kernel Random Features Inproceedings Machine Learning and Knowledge Discovery in Databases - European Conference,
ECML PKDD 2018, Dublin, Ireland, September 10-14, 2018, Proceedings,
Part II, pp. 177–192, 2018. Links | BibTeX @inproceedings{DBLP:conf/pkdd/GregorovaRKM18,
title = {Large-Scale Nonlinear Variable Selection via Kernel Random Features},
author = {Magda Gregorová and Jason Ramapuram and Alexandros Kalousis and Stéphane Marchand-Maillet},
url = {https://doi.org/10.1007/978-3-030-10928-8_11},
doi = {10.1007/978-3-030-10928-8_11},
year = {2018},
date = {2018-01-01},
booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference,
ECML PKDD 2018, Dublin, Ireland, September 10-14, 2018, Proceedings,
Part II},
pages = {177--192},
crossref = {DBLP:conf/pkdd/2018-2},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Ramapuram, Jason; Webb, Russ A New Benchmark and Progress Toward Improved Weakly Supervised Learning Journal Article BMVC 2018, abs/1807.00126 , 2018, (Code: https://github.com/apple/ml-all-pairs). Links | BibTeX @article{DBLP:journals/corr/abs-1807-00126,
title = {A New Benchmark and Progress Toward Improved Weakly Supervised Learning},
author = {Jason Ramapuram and Russ Webb},
url = {http://arxiv.org/abs/1807.00126
https://github.com/apple/ml-all-pairs},
year = {2018},
date = {2018-01-01},
journal = {BMVC 2018},
volume = {abs/1807.00126},
note = {Code: https://github.com/apple/ml-all-pairs},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2017
|
Ramapuram, Jason; Gregorova, Magda; Kalousis, Alexandros Lifelong Generative Modeling Journal Article Elsevier Neurocomputing 2020, abs/1705.09847 , 2017. Links | BibTeX @article{DBLP:journals/corr/RamapuramGK17,
title = {Lifelong Generative Modeling},
author = {Jason Ramapuram and Magda Gregorova and Alexandros Kalousis},
url = {http://arxiv.org/abs/1705.09847},
year = {2017},
date = {2017-01-01},
journal = {Elsevier Neurocomputing 2020},
volume = {abs/1705.09847},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|