Ding, Yiwei; Lerch, Alexander Audio Embeddings as Teachers for Music Classification Proceedings Article In: Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Milan, Italy, 2023. Abstract | Links | BibTeX | Tags: Computer Science - Information Retrieval, Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing Watcharasupat, Karn N; Lerch, Alexander Evaluation of Latent Space Disentanglement in the Presence of Interdependent Attributes Proceedings Article In: Late Breaking Demo (Extended Abstract), Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Online, 2021. Abstract | Links | BibTeX | Tags: Computer Science - Information Retrieval, Computer Science - Information Theory, Computer Science - Machine Learning, Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing2023
@inproceedings{ding_audio_2023,
title = {Audio Embeddings as Teachers for Music Classification},
author = {Yiwei Ding and Alexander Lerch},
url = {http://arxiv.org/abs/2306.17424},
doi = {10.48550/arXiv.2306.17424},
year = {2023},
date = {2023-06-01},
urldate = {2023-06-01},
booktitle = {Proceedings of the International Society for Music Information Retrieval Conference (ISMIR)},
address = {Milan, Italy},
abstract = {Music classification has been one of the most popular tasks in the field of music information retrieval. With the development of deep learning models, the last decade has seen impressive improvements in a wide range of classification tasks. However, the increasing model complexity makes both training and inference computationally expensive. In this paper, we integrate the ideas of transfer learning and feature-based knowledge distillation and systematically investigate using pre-trained audio embeddings as teachers to guide the training of low-complexity student networks. By regularizing the feature space of the student networks with the pre-trained embeddings, the knowledge in the teacher embeddings can be transferred to the students. We use various pre-trained audio embeddings and test the effectiveness of the method on the tasks of musical instrument classification and music auto-tagging. Results show that our method significantly improves the results in comparison to the identical model trained without the teacher's knowledge. This technique can also be combined with classical knowledge distillation approaches to further improve the model's performance.},
keywords = {Computer Science - Information Retrieval, Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
@inproceedings{watcharasupat_evaluation_2021,
title = {Evaluation of Latent Space Disentanglement in the Presence of Interdependent Attributes},
author = {Karn N Watcharasupat and Alexander Lerch},
url = {http://arxiv.org/abs/2110.05587},
year = {2021},
date = {2021-10-01},
urldate = {2021-11-11},
booktitle = {Late Breaking Demo (Extended Abstract), Proceedings of the International Society for Music Information Retrieval Conference (ISMIR)},
address = {Online},
abstract = {Controllable music generation with deep generative models has become increasingly reliant on disentanglement learning techniques. However, current disentanglement metrics, such as mutual information gap (MIG), are often inadequate and misleading when used for evaluating latent representations in the presence of interdependent semantic attributes often encountered in real-world music datasets. In this work, we propose a dependency-aware information metric as a drop-in replacement for MIG that accounts for the inherent relationship between semantic attributes.},
keywords = {Computer Science - Information Retrieval, Computer Science - Information Theory, Computer Science - Machine Learning, Computer Science - Sound, Electrical Engineering and Systems Science - Audio and Speech Processing},
pubstate = {published},
tppubtype = {inproceedings}
}
publications
Audio Embeddings as Teachers for Music Classification Proceedings Article In: Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Milan, Italy, 2023. Evaluation of Latent Space Disentanglement in the Presence of Interdependent Attributes Proceedings Article In: Late Breaking Demo (Extended Abstract), Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Online, 2021.2023
2021