2022
Reinke, Annika; Maier-Hein, Lena; Christodoulou, Evangelia; Glocker, Ben; Scholz, Patrick; Isensee, Fabian; Kleesiek, Jens; Kozubek, Michal; Reyes, Mauricio; Riegler, Michael Alexander; Wiesenfarth, Manuel; Baumgartner, Michael; Eisenmann, Matthias; Heckmann-Nötzel, Doreen; Kavur, Ali Emre; Rädsch, Tim; Tizabi, Minu D.; Acion, Laura; Antonelli, Michela; Arbel, Tal; Bakas, Spyridon; Bankhead, Peter; Benis, Arriel; Cardoso, M. Jorge; Cheplygina, Veronika; Cimini, Beth A; Collins, Gary S.; Farahani, Keyvan; Ginneken, Bram; Hamprecht, Fred A; Hashimoto, Daniel A.; Hoffman, Michael M.; Huisman, Merel; Jannin, Pierre; Kahn, Charles; Karargyris, Alexandros; Karthikesalingam, Alan; Kenngott, Hannes; Kopp-Schneider, Annette; Kreshuk, Anna; Kurc, Tahsin; Landman, Bennett A.; Litjens, Geert; Madani, Amin; Maier-Hein, Klaus; Martel, Anne; Mattson, Peter; Meijering, Erik; Menze, Bjoern; Moher, David; Moons, Karel G. M.; Müller, Henning; Nichyporuk, Brennan; Nickel, Felix; Petersen, Jens; Rajpoot, Nasir; Rieke, Nicola; Saez-Rodriguez, Julio; Sánchez, Clara I.; Shetty, Shravya; Smeden, Maarten; Sudre, Carole H.; Summers, Ronald M.; Taha, Abdel A.; Tsaftaris, Sotirios A.; Calster, Ben Van; Varoquaux, Gael; Jaeger, Paul F
Metrics Reloaded - A new recommendation framework for biomedical image analysis validation Journal Article
In: Medical Imaging with Deep Learning, 2022.
Abstract | BibTeX | Tags: Classification, Instance Segmentation, Medical Imaging, Metrics, Object Detection, Validation
@article{Reinke2022,
title = {Metrics Reloaded - A new recommendation framework for biomedical image analysis validation},
author = {Annika Reinke and Lena Maier-Hein and Evangelia Christodoulou and Ben Glocker and Patrick Scholz and Fabian Isensee and Jens Kleesiek and Michal Kozubek and Mauricio Reyes and Michael Alexander Riegler and Manuel Wiesenfarth and Michael Baumgartner and Matthias Eisenmann and Doreen Heckmann-Nötzel and Ali Emre Kavur and Tim Rädsch and Minu D. Tizabi and Laura Acion and Michela Antonelli and Tal Arbel and Spyridon Bakas and Peter Bankhead and Arriel Benis and M. Jorge Cardoso and Veronika Cheplygina and Beth A Cimini and Gary S. Collins and Keyvan Farahani and Bram Ginneken and Fred A Hamprecht and Daniel A. Hashimoto and Michael M. Hoffman and Merel Huisman and Pierre Jannin and Charles Kahn and Alexandros Karargyris and Alan Karthikesalingam and Hannes Kenngott and Annette Kopp-Schneider and Anna Kreshuk and Tahsin Kurc and Bennett A. Landman and Geert Litjens and Amin Madani and Klaus Maier-Hein and Anne Martel and Peter Mattson and Erik Meijering and Bjoern Menze and David Moher and Karel G. M. Moons and Henning Müller and Brennan Nichyporuk and Felix Nickel and Jens Petersen and Nasir Rajpoot and Nicola Rieke and Julio Saez-Rodriguez and Clara I. Sánchez and Shravya Shetty and Maarten Smeden and Carole H. Sudre and Ronald M. Summers and Abdel A. Taha and Sotirios A. Tsaftaris and Ben Van Calster and Gael Varoquaux and Paul F Jaeger},
year = {2022},
date = {2022-01-01},
journal = {Medical Imaging with Deep Learning},
abstract = {Meaningful performance assessment of biomedical image analysis algorithms depends on objective and appropriate performance metrics. There are major shortcomings in the current state of the art. Yet, so far limited attention has been paid to practical pitfalls associated when using particular metrics for image analysis tasks. Therefore, a number of international initiatives have collaborated to offer researchers with guidance and tools for selecting performance metrics in a problem-aware manner. In our proposed framework, the characteristics of the given biomedical problem are first captured in a problem fingerprint, which identifies properties related to domain interests, the target structure(s), the input datasets, and algorithm output. A problem category-specific mapping is applied in the second step to match fingerprints to metrics that reflect domain requirements. Based on input from experts from more than 60 institutions worldwide, we believe our metric recommendation framework to be useful to the MIDL community and to enhance the quality of biomedical image analysis algorithm validation.},
keywords = {Classification, Instance Segmentation, Medical Imaging, Metrics, Object Detection, Validation},
pubstate = {published},
tppubtype = {article}
}
2021
Reinke, Annika; Eisenmann, Matthias; Tizabi, Minu Dietlinde; Sudre, Carole H.; Rädsch, Tim; Antonelli, Michela; Arbel, Tal; Bakas, Spyridon; Cardoso, M. Jorge; Cheplygina, Veronika; Farahani, Keyvan; Glocker, Ben; Heckmann-Nötzel, Doreen; Isensee, Fabian; Jannin, Pierre; Kahn, Charles; Kleesiek, Jens; Kurc, Tahsin; Kozubek, Michal; Landman, Bennett A.; Litjens, Geert; Maier-Hein, Klaus; Martel, Anne L; Menze, Bjoern; Müller, Henning; Petersen, Jens; Reyes, Mauricio; Rieke, Nicola; Stieltjes, Bram; Summers, Ronald M.; Tsaftaris, Sotirios A.; Ginneken, Bram; Kopp-Schneider, Annette; Jäger, Paul; Maier-Hein, Lena
Common limitations of performance metrics in biomedical image analysis Proceedings Article
In: MIDL 2021, 2021.
Abstract | Links | BibTeX | Tags: Challenges, Metrics, segmentation, Validation
@inproceedings{Reinke2021,
title = {Common limitations of performance metrics in biomedical image analysis},
author = {Annika Reinke and Matthias Eisenmann and Minu Dietlinde Tizabi and Carole H. Sudre and Tim Rädsch and Michela Antonelli and Tal Arbel and Spyridon Bakas and M. Jorge Cardoso and Veronika Cheplygina and Keyvan Farahani and Ben Glocker and Doreen Heckmann-Nötzel and Fabian Isensee and Pierre Jannin and Charles Kahn and Jens Kleesiek and Tahsin Kurc and Michal Kozubek and Bennett A. Landman and Geert Litjens and Klaus Maier-Hein and Anne L Martel and Bjoern Menze and Henning Müller and Jens Petersen and Mauricio Reyes and Nicola Rieke and Bram Stieltjes and Ronald M. Summers and Sotirios A. Tsaftaris and Bram Ginneken and Annette Kopp-Schneider and Paul Jäger and Lena Maier-Hein},
url = {https://arxiv.org/abs/2104.05642},
year = {2021},
date = {2021-04-01},
urldate = {2021-04-01},
booktitle = {MIDL 2021},
abstract = {While the importance of automatic biomedical image analysis is increasing at an enormous pace, recent meta-research revealed major flaws with respect to algorithm validation. Performance metrics are key for objective, transparent and comparative performance assessment , but little attention has been given to their pitfalls. Under the umbrella of the Helmholtz Imaging Platform (HIP), three international initiatives-the MICCAI Society's challenge working group, the Biomedical Image Analysis Challenges (BIAS) initiative, as well as the benchmarking working group of the MONAI framework-have now joined forces with the mission to generate best practice recommendations with respect to metrics in medical image analysis. Consensus building is achieved via a Delphi process, a popular tool for integrating opinions in large international consortia. The current document serves as a teaser for the results presentation and focuses on the pitfalls of the most commonly used metric in biomedical image analysis, the Dice Similarity Coefficient (DSC), in the categories of (1) mathematical properties/edge cases, (2) task/metric fit and (3) metric aggregation. Being compiled by a large group of experts from more than 30 institutes worldwide, we believe that our framework could be of general interest to the MIDL community and will improve the quality of biomedical image analysis algorithm validation.},
keywords = {Challenges, Metrics, segmentation, Validation},
pubstate = {published},
tppubtype = {inproceedings}
}