@inproceedings{4bbb57850c85465dbecaca2e67aa42f7,
title = "Transformer Approaches in Image Captioning: A Literature Review",
abstract = "Image captioning is one of the challenging tasks that cross the computer vision and the Natural Language Processing (NLP) domain. Its main task is to interpret images in a descriptive text similar to humans. Image captioning is useful to help humans understand visual content. The main challenge is to get a coherent caption that could be understood by a human. With the trend of Transformer in computer vision that has proven successful to reach new results in state-of-the-art, the interest to implement it in Image Captioning is also increased. This paper presents a literature review of image captioning using transformer methods. The literature is reviewed from reputable journals and conferences. Our review focus on transformer approaches in order to improve the model performance in image captioning. We also explore the existing public datasets that are used in image captioning. The limitations and future research on image captioning are also discussed with additional potential subsidiary research.",
keywords = "Attention Mechanism, Automatic Captioning, Image Captioning, Literature Review, Transformer",
author = "Hilya Tsaniya and Chastine Fatichah and Nanik Suciati",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 14th International Conference on Information Technology and Electrical Engineering, ICITEE 2022 ; Conference date: 18-10-2022 Through 19-10-2022",
year = "2022",
doi = "10.1109/ICITEE56407.2022.9954086",
language = "English",
series = "ICITEE 2022 - Proceedings of the 14th International Conference on Information Technology and Electrical Engineering",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "280--285",
booktitle = "ICITEE 2022 - Proceedings of the 14th International Conference on Information Technology and Electrical Engineering",
address = "United States",
}