@inproceedings{8b2a18a87c5b4ba2bb2fa5fe33f5fe2f,
title = "Understanding Feature Importance of Prediction Models Based on Lung Cancer Primary Care Data",
abstract = "Machine learning (ML) models in healthcare are increasing but the lack of interpretability of these models results in them not being suitable for use in clinical practice. In the medical field, it is vital to clarify to clinicians and patients the rationale behind a model's high probability prediction for a specific disease in an individual patient. This transparency fosters trust, facilitates informed decision-making, and empowers both clinicians and patients to understand the underlying factors driving the model's output. This paper aims to incorporate explainability to ML models such as Random Forest (RF), eXtreme Gradient Boosting (XGBoost) and Multilyer Perceptron (MLP) for using with Clinical Practice Research Datalink (CPRD) data and interpret them in terms of feature importance to identify the top most features when distinguishing between lung cancer and non-lung cancer cases. The SHapley Additive exPlanations (SHAP) method has been used in this work to interpret the models. We use SHAP to gain insights into explaining individual predictions as well as interpreting them globally. The feature importance from SHAP is compared with the default feature importance of the models to identify any discrepancies between the results. Based on experimental findings, it has been found that the default feature importance from the tree-based models and SHAP is consistent with features 'age' and 'smoking status' which serve as the top features for predicting lung cancer among patients. Additionally, this work pinpoints that feature importance for a single patient may vary leading to a varied prediction depending on the employed model. Finally, the work concludes that individual-level explanation of feature importance is crucial in mission-critical applications like healthcare to better understand personal health and lifestyle factors in the early prediction of diseases that may lead to terminal illness.",
keywords = "CPRD, SHAP, feature importance, interpretability, lung cancer, machine learning",
author = "Teena Rai and Yuan Shen and Jun He and Mufti Mahmud and Brown, {David J.} and Jaspreet Kaur and Emma O'Dowd and Baldwin, {David R.} and Richard Hubbard",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 International Joint Conference on Neural Networks, IJCNN 2024 ; Conference date: 30-06-2024 Through 05-07-2024",
year = "2024",
doi = "10.1109/IJCNN60899.2024.10650819",
language = "English",
series = "Proceedings of the International Joint Conference on Neural Networks",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2024 International Joint Conference on Neural Networks, IJCNN 2024 - Proceedings",
address = "United States",
}