@conference{Hurmalainen_EUSIPCO_2012,
author = "Antti Hurmalainen and Jort Gemmeke and Tuomas Virtanen",
abstract = "In real world speech processing, the signals are often continuous and consist of momentary segments of speech over non-stationary background noise. It has been demonstrated that spectral factorisation using multi-frame atoms can be successfully employed to separate and recognise speech in adverse conditions. While in previous work full knowledge of utterance endpointing and speaker identity was used for noise modelling and speech recognition, this study proposes spectral factorisation and sparse classification techniques to detect, identify, separate and recognise speech from a continuous noisy input. Speech models are trained beforehand, but noise models are acquired adaptively from the input by using voice activity detection without prior knowledge of noise-only locations. The results are evaluated on the CHiME corpus, containing utterances from 34 speakers over highly non-stationary multi-source noise.",
address = "Bucharest, Romania",
booktitle = "20th European Signal Processing Conference (EUSIPCO)",
keywords = "Spectral factorization;speech recognition;speaker recognition;voice activity detection;speech separation",
month = "August",
organization = "European Association for Signal, Speech, and Image Processing (EURASIP)",
pages = "2649-2653",
title = "{D}etection, {S}eparation and {R}ecognition of {S}peech {F}rom {C}ontinuous {S}ignals {U}sing {S}pectral {F}actorisation",
year = "2012",
}