@conference{6daacbf921394c6087a3fac5a412a47a,
author = "Deepak Baby and Jort Gemmeke and Tuomas Virtanen and Hugo Van Hamme",
abstract = "Deep neural network (DNN) based acoustic modelling has been successfully used for a variety of automatic speech recognition (ASR) tasks, thanks to its ability to learn higher-level information using multiple hidden layers. This paper investigates the recently proposed exemplar-based speech enhancement technique using coupled dictionaries as a pre-processing stage for DNN-based systems. In this setting, the noisy speech is decomposed as a weighted sum of atoms in an input dictionary containing exemplars sampled from a domain of choice, and the resulting weights are applied to a coupled output dictionary containing exemplars sampled in the short-time Fourier transform (STFT) domain to directly obtain the speech and noise estimates for speech enhancement. In this work, settings using input dictionary of exemplars sampled from the STFT, Mel-integrated magnitude STFT and modulation envelope spectra are evaluated. Experiments performed on the AURORA-4 database revealed that these pre-processing stages can improve the performance of the DNN-HMM-based ASR systems with both clean and multi-condition training.",
booktitle = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
doi = "10.1109/ICASSP.2015.7178819",
isbn = "9781467369978",
keywords = "coupled dictionaries; deep neural networks; modulation envelope; non-negative matrix factorisation; speech enhancement",
month = "8",
pages = "4485--4489",
publisher = "The Institute of Electrical and Electronics Engineers, Inc.",
title = "{E}xemplar-based speech enhancement for deep neural network based automatic speech recognition",
year = "2015",
}