@inproceedings{e4cfc67388f64d8dbb7d2e3250635776,
title = "Asclepius: A Spectrum Evaluation Benchmark for Medical Multi-Modal Large Language Models",
abstract = "The significant breakthroughs of Medical Multi-Modal Large Language Models (Med-MLLMs) renovate modern healthcare with robust information synthesis and medical decision support. However, these models are often evaluated on benchmarks that are unsuitable for the Med-MLLMs due to the complexity of real-world diagnostics across diverse specialties. To address this gap, we introduce Asclepius, a novel Med-MLLM benchmark that comprehensively assesses Med-MLLMs in terms of: distinct medical specialties (cardiovascular, gas-troenterology, etc.) and different diagnostic capacities (perception, disease analysis, etc.). Grounded in 3 proposed core principles, Asclepius ensures a comprehensive evaluation by encompassing 15 medical specialties, stratifying into 3 main categories and 8 sub-categories of clinical tasks, and exempting overlap with existing VQA dataset. We further provide an in-depth analysis of 6 Med-MLLMs and compare them with 3 human specialists, providing insights into their competencies and limitations in various medical contexts. Our work not only advances the understanding of Med-MLLMs' capabilities but also sets a precedent for future evaluations and the safe deployment of these models in clinical environments.",
author = "Jie Liu and Wenxuan Wang and Yihang Su and Jingyuan Huan and Yudi Zhang and Li, \{Cheng Yi\} and Wenting Chen and Xiaohan Xing and Chang, \{Kao Jung\} and Linlin Shen and Lyu, \{Michael R.\}",
note = "Publisher Copyright: {\textcopyright} 2025 Association for Computational Linguistics.; 63rd Annual Meeting of the Association for Computational Linguistics, ACL 2025 ; Conference date: 27-07-2025 Through 01-08-2025",
year = "2025",
language = "English",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "24181--24201",
editor = "Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Pilehvar, \{Mohammad Taher\}",
booktitle = "Long Papers",
address = "United States",
}