@conference{Pasi Pertila2017,
author = "Sharath Adavanne and Pasi Pertila and Tuomas Virtanen",
abstract = "This paper proposes to use low-level spatial features extracted
from multichannel audio for sound event detection.
We extend the convolutional recurrent neural network to handle
more than one type of these multichannel features by
learning from each of them separately in the initial stages.
We show that instead of concatenating the features of each
channel into a single feature vector the network learns sound
events in multichannel audio better when they are presented
as separate layers of a volume. Using the proposed spatial
features over monaural features on the same network gives
an absolute F-score improvement of 6.1% on the publicly
available TUT-SED 2016 dataset and 2.7% on the TUT-SED
2009 dataset that is fifteen times larger",
booktitle = "IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2017)",
keywords = "Sound event detection;multichannel audio;spatial features;convolutional recurrent neural network",
title = "{S}ound event detection using spatial features and convolutional recurrent neural network",
url = "https://www.cs.tut.fi/~adavanne/papers/Adavanne_ICASSP2017.pdf",
year = "2017",
}