2021
Reinke, Annika; Eisenmann, Matthias; Tizabi, Minu Dietlinde; Sudre, Carole H.; Rädsch, Tim; Antonelli, Michela; Arbel, Tal; Bakas, Spyridon; Cardoso, M. Jorge; Cheplygina, Veronika; Farahani, Keyvan; Glocker, Ben; Heckmann-Nötzel, Doreen; Isensee, Fabian; Jannin, Pierre; Kahn, Charles; Kleesiek, Jens; Kurc, Tahsin; Kozubek, Michal; Landman, Bennett A.; Litjens, Geert; Maier-Hein, Klaus; Martel, Anne L; Menze, Bjoern; Müller, Henning; Petersen, Jens; Reyes, Mauricio; Rieke, Nicola; Stieltjes, Bram; Summers, Ronald M.; Tsaftaris, Sotirios A.; Ginneken, Bram; Kopp-Schneider, Annette; Jäger, Paul; Maier-Hein, Lena
Common limitations of performance metrics in biomedical image analysis Proceedings Article
In: MIDL 2021, 2021.
Abstract | Links | BibTeX | Tags: Challenges, Metrics, segmentation, Validation
@inproceedings{Reinke2021,
title = {Common limitations of performance metrics in biomedical image analysis},
author = {Annika Reinke and Matthias Eisenmann and Minu Dietlinde Tizabi and Carole H. Sudre and Tim Rädsch and Michela Antonelli and Tal Arbel and Spyridon Bakas and M. Jorge Cardoso and Veronika Cheplygina and Keyvan Farahani and Ben Glocker and Doreen Heckmann-Nötzel and Fabian Isensee and Pierre Jannin and Charles Kahn and Jens Kleesiek and Tahsin Kurc and Michal Kozubek and Bennett A. Landman and Geert Litjens and Klaus Maier-Hein and Anne L Martel and Bjoern Menze and Henning Müller and Jens Petersen and Mauricio Reyes and Nicola Rieke and Bram Stieltjes and Ronald M. Summers and Sotirios A. Tsaftaris and Bram Ginneken and Annette Kopp-Schneider and Paul Jäger and Lena Maier-Hein},
url = {https://arxiv.org/abs/2104.05642},
year = {2021},
date = {2021-04-01},
urldate = {2021-04-01},
booktitle = {MIDL 2021},
abstract = {While the importance of automatic biomedical image analysis is increasing at an enormous pace, recent meta-research revealed major flaws with respect to algorithm validation. Performance metrics are key for objective, transparent and comparative performance assessment , but little attention has been given to their pitfalls. Under the umbrella of the Helmholtz Imaging Platform (HIP), three international initiatives-the MICCAI Society's challenge working group, the Biomedical Image Analysis Challenges (BIAS) initiative, as well as the benchmarking working group of the MONAI framework-have now joined forces with the mission to generate best practice recommendations with respect to metrics in medical image analysis. Consensus building is achieved via a Delphi process, a popular tool for integrating opinions in large international consortia. The current document serves as a teaser for the results presentation and focuses on the pitfalls of the most commonly used metric in biomedical image analysis, the Dice Similarity Coefficient (DSC), in the categories of (1) mathematical properties/edge cases, (2) task/metric fit and (3) metric aggregation. Being compiled by a large group of experts from more than 30 institutes worldwide, we believe that our framework could be of general interest to the MIDL community and will improve the quality of biomedical image analysis algorithm validation.},
keywords = {Challenges, Metrics, segmentation, Validation},
pubstate = {published},
tppubtype = {inproceedings}
}