@inproceedings{f5ead27240cc4900bf5991f2454ce3a1,
title = "Logistic Regression Approach to a Joint Classification and Feature Selection in Lung Cancer Screening Using CPRD Data",
abstract = "Lung cancer is one of the most deadly cancers in the world. Its mortality rate is high when the cancer is diagnosed late. Therefore, early detection is a crucial factor for an increase in survival rate, and lung cancer screening is one of the most important intervention tools. However, the screening would be cost-effective only when we can accurately select a sub-population which is at the most risk of lung cancer. It is hypothesised that this selection task can be done cost-effectively when we use clinical data (e.g. demographic, lifestyle and comorbidity variables) rather than low-dose CT. This work used the clinical data extracted from Clinical Practice Research Datalink (CPRD). The goal is to test whether this approach can achieve comparable or even better selection performance when compared to an alternative approach using clinical data from lung cancer screening trials. The latter approach is adopted in [54]. In this paper, we further adapt the logistic regression model for a joint classification and feature selection analysis. The model is implemented in an {\textquoteleft}ensemble learning{\textquoteright} manner to deal with severe {\textquoteleft}class imbalance{\textquoteright} problems. It is observed that the sensitivity and specificity results are slightly better than those reported in [54]. Also, we identified a comorbidity factor COPD and a smoking-related factor smk-status as the two most discriminative features.",
keywords = "CPRD, Cancer screening, Classification, Cost-effectiveness, Early detection, Feature selection, Imbalanced classification, Logistic regression, Lung cancer",
author = "Yuan Shen and Jaspreet Kaur and Mufti Mahmud and Brown, {David J.} and Jun He and Rahman, {Muhammad Arifur} and Baldwin, {David R.} and Emma O{\textquoteright}Dowd and Hubbard, {Richard B.}",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd.; 2nd International Conference on Trends in Electronics and Health Informatics, TEHI 2022 ; Conference date: 07-12-2022 Through 09-12-2022",
year = "2023",
doi = "10.1007/978-981-99-1916-1_15",
language = "English",
isbn = "9789819919154",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "191--206",
editor = "Mufti Mahmud and Claudia Mendoza-Barrera and Kaiser, {M. Shamim} and Anirban Bandyopadhyay and Kanad Ray and Eduardo Lugo",
booktitle = "Proceedings of Trends in Electronics and Health Informatics - TEHI 2022",
address = "Germany",
}