Lu, Yen-Cheng; Wu, Chih-Wei; Lu, Chang-Tien; Lerch, Alexander An Unsupervised Approach to Anomaly Detection in Music Datasets Proceedings Article In: Proceedings of the ACM SIGIR Conference (SIGIR), pp. 749–752, ACM, Pisa, 2016, ISBN: 978-1-4503-4069-4. Abstract | Links | BibTeX | Tags: anomaly detection, data clean-up, music genre retrieval, music information retrieval Lu, Yen-Cheng; Wu, Chih-Wei; Lu, Chang-Tien; Lerch, Alexander Automatic Outlier Detection in Music Genre Datasets Proceedings Article In: Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), ISMIR, New York, 2016. Abstract | Links | BibTeX | Tags: anomaly detection, data clean-up, music genre retrieval, music information retrieval2016
@inproceedings{lu_unsupervised_2016,
title = {An Unsupervised Approach to Anomaly Detection in Music Datasets},
author = {Yen-Cheng Lu and Chih-Wei Wu and Chang-Tien Lu and Alexander Lerch},
url = {http://www.musicinformatics.gatech.edu/wp-content_nondefault/uploads/2016/07/Lu-et-al_2016_An-Unsupervised-Approach-to-Anomaly-Detection-in-Music-Datasets.pdf},
doi = {10.1145/2911451.2914700},
isbn = {978-1-4503-4069-4},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the ACM SIGIR Conference (SIGIR)},
pages = {749--752},
publisher = {ACM},
address = {Pisa},
series = {SIGIR '16},
abstract = {This paper presents an unsupervised method for systematically identifying anomalies in music datasets. The model integrates categorical regression and robust estimation techniques to infer anomalous scores in music clips. When applied to a music genre recognition dataset, the new method is able to detect corrupted, distorted, or mislabeled audio samples based on commonly used features in music information retrieval. The evaluation results show that the algorithm outperforms other anomaly detection methods and is capable of finding problematic samples identified by human experts. The proposed method introduces a preliminary framework for anomaly detection in music data that can serve as a useful tool to improve data integrity in the future.},
keywords = {anomaly detection, data clean-up, music genre retrieval, music information retrieval},
pubstate = {published},
tppubtype = {inproceedings}
}
@inproceedings{lu_automatic_2016,
title = {Automatic Outlier Detection in Music Genre Datasets},
author = {Yen-Cheng Lu and Chih-Wei Wu and Chang-Tien Lu and Alexander Lerch},
url = {http://www.musicinformatics.gatech.edu/wp-content_nondefault/uploads/2016/07/Lu-et-al_2016_Automatic-Outlier-Detection-in-Music-Genre-Datasets.pdf},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the International Society for Music Information Retrieval Conference (ISMIR)},
publisher = {ISMIR},
address = {New York},
series = {ISMIR},
abstract = {Outlier detection, also known as anomaly detection, is an
importanttopicthathasbeenstudiedfordecades. Anoutlier
detection system is able to identify anomalies in a dataset
and thus improve data integrity by removing the detected
outliers. It has been successfully applied to different types
of data in various fields such as cyber-security, finance,
and transportation. In the field of Music Information Re-
trieval (MIR), however, the number of related studies is
small. In this paper, we introduce different state-of-the-art
outlier detection techniques and evaluate their viability in
the context of music datasets. More specifically, we present
a comparative study of 6 outlier detection algorithms ap-
plied to a Music Genre Recognition (MGR) dataset. It is
determined how well algorithms can identify mislabeled or
corrupted files, and how much the quality of the dataset can
be improved. Results indicate that state-of-the-art anomaly
detection systems have problems identifying anomalies in
MGR datasets reliably.},
keywords = {anomaly detection, data clean-up, music genre retrieval, music information retrieval},
pubstate = {published},
tppubtype = {inproceedings}
}
importanttopicthathasbeenstudiedfordecades. Anoutlier
detection system is able to identify anomalies in a dataset
and thus improve data integrity by removing the detected
outliers. It has been successfully applied to different types
of data in various fields such as cyber-security, finance,
and transportation. In the field of Music Information Re-
trieval (MIR), however, the number of related studies is
small. In this paper, we introduce different state-of-the-art
outlier detection techniques and evaluate their viability in
the context of music datasets. More specifically, we present
a comparative study of 6 outlier detection algorithms ap-
plied to a Music Genre Recognition (MGR) dataset. It is
determined how well algorithms can identify mislabeled or
corrupted files, and how much the quality of the dataset can
be improved. Results indicate that state-of-the-art anomaly
detection systems have problems identifying anomalies in
MGR datasets reliably.
publications
An Unsupervised Approach to Anomaly Detection in Music Datasets Proceedings Article In: Proceedings of the ACM SIGIR Conference (SIGIR), pp. 749–752, ACM, Pisa, 2016, ISBN: 978-1-4503-4069-4. Automatic Outlier Detection in Music Genre Datasets Proceedings Article In: Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), ISMIR, New York, 2016.2016