@inproceedings{1025a4cdcaef4dbd9af5a598e1e65d7c,
title = "Two-Stage Sampling: A Framework for Imbalanced Classification With Overlapped Classes",
abstract = "Class imbalance and overlapping instances problems have long been recognized as one of the major causes of the performance deterioration of the classification model. Moreover, the majority class may have an irrelevant and noisy instance that shifts the decision boundary of the classification far away from the ideal one. We propose a framework for balancing the class distribution and mitigating the class overlap problem in a dataset. The key feature of our framework is its ability to detect the overlapping instances between classes and then remove the problematic instances from the majority class. Thus, it will have more precise information for the oversampling method to generate the synthetic minority instances. We evaluated the proposed framework using the Lending club and ten other datasets from the KEEL repository. We demonstrate the implementations of our framework using Tomek and Edited Nearest Neighbor for removing the overlapping instances from the majority class and SWIM-MD for generating the synthetic minority instances. Also, we used eight well-known classifiers to show that our proposed framework can improve the performance of various classifiers. Lastly, we present a detailed analysis of the experimental result that shows the superiority of our proposed framework. Our proposed framework outperformed the state-of-the-art methods in terms of geometry mean classification performance metric.",
keywords = "imbalance, machine learning, overlap, synthetic oversampling, undersampling",
author = "Firdausanti, {Neni Alya} and Fatyanosa, {Tirana Noor} and Mahendra Data and Israel Mendonca and Masayoshi Aritsugi",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE International Conference on Big Data, Big Data 2022 ; Conference date: 17-12-2022 Through 20-12-2022",
year = "2022",
doi = "10.1109/BigData55660.2022.10020788",
language = "English",
series = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "271--280",
editor = "Shusaku Tsumoto and Yukio Ohsawa and Lei Chen and {Van den Poel}, Dirk and Xiaohua Hu and Yoichi Motomura and Takuya Takagi and Lingfei Wu and Ying Xie and Akihiro Abe and Vijay Raghavan",
booktitle = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
address = "United States",
}