@inproceedings{e45bf46bbaeb46fb9c5a760104228f49,
title = "Indonesian Part-of-Speech Tagger: A Comparative Study",
abstract = "POS Tagging is one of the essential tasks in Natural Language Processing (NLP). Researchers are competing to find the best model for Indonesian POS Tagging cases. However, they still use one corpus as a reference for model development. This study investigates the comparison of three different corpora and three state-of-the-art models to explore the use of appropriate corpora and models for Indonesian POS Tagging. We divided the corpus into training, validation, and testing datasets. We use the training and validation datasets to tune the model. We use the test dataset to evaluate the performance of the model. The experimental results show that Yunshan and Dinakaramani corpora have outstanding performance in POS Tagging. While the Feedforward and BiLSTM models have equally superior performance, outperforming the other models with the highest value of 96.10%. This experiment proves that both models are stable when applied to different corpus. Further investigation is needed to improve the performance of the models by considering variations in word embedding usage, architecture, and evaluation methods.",
keywords = "Indonesia, POS Tagging, corpus, tagset",
author = "Muhammad Alfian and Yuhana, {Umi Laili} and Daniel Siahaan",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 10th International Conference on Advanced Informatics: Concept, Theory and Application, ICAICTA 2023 ; Conference date: 07-10-2023 Through 09-10-2023",
year = "2023",
doi = "10.1109/ICAICTA59291.2023.10390353",
language = "English",
series = "2023 10th International Conference on Advanced Informatics: Concept, Theory and Application, ICAICTA 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2023 10th International Conference on Advanced Informatics",
address = "United States",
}