@inproceedings{lorenzo2017misperceptions,
  title={Misperceptions of the emotional content of natural and vocoded speech in a car},
  author={Lorenzo-Trueba, Jaime and Valentini-Botinhao, Cassia and Henter, Gustav Eje and Yamagishi, Junichi},
  booktitle={Proc. Interspeech},
  abstract={This paper analyzes a) how often listeners interpret the emotional content of an utterance incorrectly when listening to vocoded or natural speech in adverse conditions; b) which noise conditions cause the most misperceptions; and c) which group of listeners misinterpret emotions the most. The long-term goal is to construct new emotional speech synthesizers that adapt to the environment and to the listener. We performed a large-scale listening test where over 400 listeners between the ages of 21 and 72 assessed natural and vocoded acted emotional speech stimuli. The stimuli had been artificially degraded using a room impulse response recorded in a car and various in-car noise types recorded in a real car. Experimental results show that the recognition rates for emotions and perceived emotional strength degrade as signal-to-noise ratio decreases. Interestingly, misperceptions seem to be more pronounced for negative and low-arousal emotions such as calmness or anger, while positive emotions such as happiness appear to be more robust to noise. An ANOVA analysis of listener meta-data further revealed that gender and age also influenced results, with elderly male listeners most likely to incorrectly identify emotions.},
  keywords={emotional perception, speech in noise, emotion recognition, car noise},
  address={Stockholm, Sweden},
  month={Aug.},
  publisher={ISCA},
  volume={18},
  pages={606--610},
  doi={10.21437/Interspeech.2017-532},
  year={2017}
}