@techreport{Eghbal-Zadeh2016,
    Author = "Eghbal-Zadeh, Hamid and Lehner, Bernhard and Dorfer, Matthias and Widmer, Gerhard",
    title = "{CP-JKU} Submissions for {DCASE}-2016: a Hybrid Approach Using Binaural I-Vectors and Deep Convolutional Neural Networks",
    abstract = "This report describes the 4 submissions for Task 1 (Audio scene classification) of the DCASE-2016 challenge of the CP-JKU team. We propose 4 different approaches for Audio Scene Classification (ASC). First, we propose a novel i-vector extraction scheme for ASC using both left and right audio channels. Second, we propose a Deep Convolutional Neural Network (DCNN) architecture trained on spectrograms of audio excerpts in end-to-end fashion. Third, we use a calibration transformation to improve the performance of our binaural i-vector system. Finally, we propose a late-fusion of our binaural i-vector and the DCNN. We report the performance of our proposed methods on the provided cross-validation setup for the DCASE-2016 challenge. Using the late-fusion approach, we improve the performance of the baseline by 17% in accuracy.",
    month = "September",
    year = "2016",
    institution = "DCASE2016 Challenge"
}