@inproceedings{bce1da55bba44828ae1a7ac29caecddf,
title = "VeCAF: Vision-language Collaborative Active Finetuning with Training Objective Awareness",
abstract = "Finetuning a pretrained vision model (PVM) is a common technique for learning downstream vision tasks. The conventional finetuning process with the randomly sampled data points results in diminished training efficiency. To address this drawback, we propose a novel approach, Vision- languag e C ollaborative A ctive F inetuning (VeCAF). VeCAF optimizes a parametric data selection model by incorporating the training objective of the model being tuned. Effectively, this guides the PVM towards the performance goal with improved data and computational efficiency.With the ever-growing feasibility of acquiring labels and natural language annotations of image data through web-scale crawling, we exploit the inherent semantic richness of the text embedding space and utilize text embeddings of image annotations to augment PVM image features for better data selection and finetuning. Furthermore, the flexibility of text-domain augmentation gives VeCAF the unique ability to handle out-of-distribution scenarios without external augmented data. Extensive experiments show the leading performance and high efficiency of VeCAF that is superior to baselines in both in-distribution and out-of-distribution image classification tasks. On ImageNet, VeCAF needs up to 3.3× less training batches to reach the target performance compared to full fine-tuning and achieves an accuracy improvement of 2.8\% over active SOTA fine-tuning methods with the same number of batches. Our code is now available at https://github.com/RoyZry98/VeCAF-Pytorch.",
keywords = "active learning, fine-tuning, vision-language models",
author = "Rongyu Zhang and Zefan Cai and Huanrui Yang and Zidong Liu and Denis Gudovskiy and Tomoyuki Okuno and Yohei Nakata and Kurt Keutzer and Baobao Chang and Yuan Du and Li Du and Shanghang Zhang",
note = "Publisher Copyright: {\textcopyright} 2024 ACM.; 32nd ACM International Conference on Multimedia, MM 2024 ; Conference date: 28-10-2024 Through 01-11-2024",
year = "2024",
month = oct,
day = "28",
doi = "10.1145/3664647.3681135",
language = "English (US)",
series = "MM 2024 - Proceedings of the 32nd ACM International Conference on Multimedia",
publisher = "Association for Computing Machinery, Inc",
pages = "5451--5459",
booktitle = "MM 2024 - Proceedings of the 32nd ACM International Conference on Multimedia",
}