@inproceedings{730f87351355436b929f29499cb6669a,
title = "Co-occurrence technique and dictionary based method for Indonesian thesaurus construction",
abstract = "Thesaurus as control vocabulary can be an important tool in Natural Language Processing (NLP). However, constructing a thesaurus manually by experts can be time consuming. Besides that the subjectivity of each expert can affect the structure of the thesaurus. A lot of method has already been implemented to build an automatic thesaurus in languages that categorized as rich language resources. In poor language resources such as Indonesia, the research about this field is still limited. This paper proposed a framework to construct a thesaurus in Indonesian language using monolingual corpus. The method will use Indonesian dictionary and large monolingual corpus from news articles. The candidate related terms will be extracted from every resource, then the two candidate will produce the final result of thesaurus. The evaluation is done by using the thesaurus as QE (Query Expansion) resource in IR (Information Retrieval) system. The experimental results show that using the automatic thesaurus can obtain the precision and recall of the system with 54.00% and 85.42%, respectively.",
keywords = "Indonesian language, monolingual corpus, query expansion, thesaurus",
author = "Sholikah, {Rizka W.} and Arifin, {Agus Zainal} and Diana Purwitasari and Chastine Fatichah",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 5th International Conference on Information and Communication Technology, ICoIC7 2017 ; Conference date: 17-05-2017 Through 19-05-2017",
year = "2017",
month = oct,
day = "18",
doi = "10.1109/ICoICT.2017.8074649",
language = "English",
series = "2017 5th International Conference on Information and Communication Technology, ICoIC7 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2017 5th International Conference on Information and Communication Technology, ICoIC7 2017",
address = "United States",
}