@inproceedings{72fbd303ce374b6eac13d0ab4e461684,
title = "Development of under-resourced Bahasa Indonesia speech corpus",
abstract = "Although Bahasa Indonesia is used by about 263 milion people in the world, it is calssified into an under-resourced language. In this paper we outlined the development of casual sentences of Bahasa Indonesia speech corpus in which contains a speech database and its transcription. Firstly, we selected casual Bahasa Indonesia sentences from movie and drama trasncript and formed 1029 declarative sentences and 500 question sentences, respectively. We hired six professional radio news readers to utter the sentences to avoid local dialect in sound-proof booth. Then segmentation and labeling was performed to make create transcription including the time label of each invidual phoneme. To ensure the quality of the database, we manually inspected the waveform and the frequency of the individual sentences using spectrogram. The results suggest that the speech corpus may be used for speech processing project like speech recognition and speech synthesis. In the on-going research, we are developing high quality of speech synthesis, namely speaker adaptation and speaker averaging.",
keywords = "Bahasa Indonesia, labeling, segmentation, speech corpus, under-resourced language",
author = "Elok Cahyaningtyas and Dhany Arifianto",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 9th Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, APSIPA ASC 2017 ; Conference date: 12-12-2017 Through 15-12-2017",
year = "2017",
month = jul,
day = "2",
doi = "10.1109/APSIPA.2017.8282191",
language = "English",
series = "Proceedings - 9th Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, APSIPA ASC 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1097--1101",
booktitle = "Proceedings - 9th Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, APSIPA ASC 2017",
address = "United States",
}