@techreport{Eghbal-Zadeh2016, Author = "Eghbal-Zadeh, Hamid and Lehner, Bernhard and Dorfer, Matthias and Widmer, Gerhard", title = "{CP-JKU} Submissions for {DCASE}-2016: a Hybrid Approach Using Binaural I-Vectors and Deep Convolutional Neural Networks", abstract = "This report describes the 4 submissions for Task 1 (Audio scene classification) of the DCASE-2016 challenge of the CP-JKU team. We propose 4 different approaches for Audio Scene Classification (ASC). First, we propose a novel i-vector extraction scheme for ASC using both left and right audio channels. Second, we propose a Deep Convolutional Neural Network (DCNN) architecture trained on spectrograms of audio excerpts in end-to-end fashion. Third, we use a calibration transformation to improve the performance of our binaural i-vector system. Finally, we propose a late-fusion of our binaural i-vector and the DCNN. We report the performance of our proposed methods on the provided cross-validation setup for the DCASE-2016 challenge. Using the late-fusion approach, we improve the performance of the baseline by 17% in accuracy.", month = "September", year = "2016", institution = "DCASE2016 Challenge" }