TL;DR: We evaluate object-centric representations with VLMs for real visual reasoning and introduce a unified metric that jointly measures localization and representation usefulness.
@inproceedings{singh2026evaluating,
author = {Krishnakant Singh and Simone Schaub-Meyer and Stefan Roth},
title = {Evaluating Object-Centric Models beyond Object Discovery},
booktitle = {arXiv: [cs.CV]},
year = {2024},
}