@inproceedings{6c74bd9eeca049d78c8889296dad42aa,
title = "Jointly Predicting Emotion, Age, and Country Using Pre-Trained Acoustic Embedding",
abstract = "In this paper, we demonstrated the benefit of using a pre-trained model to extract acoustic embedding to jointly predict (multitask learning) three tasks: emotion, age, and native country. The pre-trained model was trained with wav2vec 2.0 large and robust model on the speech emotion corpus. The emotion and age tasks were regression problems, while country prediction was a classification task. A single harmonic mean from three metrics was used to evaluate the performance of multitask learning. The classifier was a linear network with two independent layers and shared layers connected to the output layers. This study explores multitask learning on different acoustic features (including the acoustic embedding extracted from a model trained on an affective speech dataset), seed numbers, batch sizes, and waveform normalizations for predicting paralinguistic information from speech.",
keywords = "acoustic embedding, affective computing, age prediction, country prediction, multitask learning, speech emotion recognition",
author = "Atmaja, {Bagus Tris} and Zanjabila and Akira Sasou",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos, ACIIW 2022 ; Conference date: 17-10-2022 Through 21-10-2022",
year = "2022",
doi = "10.1109/ACIIW57231.2022.10085991",
language = "English",
series = "2022 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos, ACIIW 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2022 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos, ACIIW 2022",
address = "United States",
}