@inproceedings{93ae57afa674410c936880475b7c76e8,
title = "OOV Handling Using Partial Lemma-Based Language Model in LF-MMI Based ASR for Bahasa Indonesia",
abstract = "One of the common problems in ASR is the out-of-vocabulary word in an utterance that can degrade the performance of the system. Bahasa Indonesia, as an agglutinative language, uses affixation to generate words from a set of affixes and root words. We propose the use of a partial lemma-based language model (LM) and lexicon that can handle words created from affixation. The partial lemma-based LM and lexicon are created from the original ones using morphology analyzer output as a reference. The experiment shows that using the LM in ASR with LF-MMI cost function gives a better WER when the heuristic to insert inter-word short pause is modified to also consider the affixes.",
keywords = "ASR, Bahasa Indonesia, LF-MMI, Language Model, OOV",
author = "Agung Santosa and Asril Jarin and Yuniarno, {Eko Mulyanto} and Hammam Riza and Purnomo, {Mauridhi Hery}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 International Conference on Computer Engineering, Network and Intelligent Multimedia, CENIM 2022 ; Conference date: 22-11-2022 Through 23-11-2022",
year = "2022",
doi = "10.1109/CENIM56801.2022.10037479",
language = "English",
series = "Proceeding of the International Conference on Computer Engineering, Network and Intelligent Multimedia, CENIM 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "167--171",
booktitle = "Proceeding of the International Conference on Computer Engineering, Network and Intelligent Multimedia, CENIM 2022",
address = "United States",
}