@inproceedings{fe70b36178ba4434a8f854d0c6d5d100,
title = "Towards Light Weight Object Detection System",
abstract = "Transformers are a popular choice for classification tasks and as backbones for object detection tasks. However, their high latency brings challenges in their adaptation to lightweight classification and object detection systems. We present an approximation of the self-attention layers used in the transformer architecture. This approximation significantly reduces the latency of the classification system while incurring minimal loss in accuracy. We also present a method that uses a transformer encoder layer for multi-resolution feature fusion for object detection. This feature fusion improves the accuracy of the state-of-the-art lightweight object detection system without significantly increasing the number of parameters. These modules can be easily integrated into existing CNN and Transformer architecture to reduce latency and increase the accuracy of the system. Finally, we provide an abstraction for the transformer architecture called Generalized Transformer (gFormer) that can guide the design of novel transformer-like architectures.",
keywords = "object detection, self-attention, Vision transformer",
author = "Dharma, {K. C.} and Dayana, {Venkata Ravi Kiran} and Wu, {Meng Lin} and Cherukuri, {Venkateswara Rao} and Hau Hwang and Morrison, {Clayton T.}",
note = "Publisher Copyright: {\textcopyright} 2024 SPIE.; 2024 International Workshop on Advanced Imaging Technology, IWAIT 2024 ; Conference date: 07-01-2024 Through 08-01-2024",
year = "2024",
doi = "10.1117/12.3017850",
language = "English (US)",
series = "Proceedings of SPIE - The International Society for Optical Engineering",
publisher = "SPIE",
editor = "Masayuki Nakajima and Lau, {Phooi Yee} and Jae-Gon Kim and Hiroyuki Kubo and Chuan-Yu Chang and Qian Kemao",
booktitle = "International Workshop on Advanced Imaging Technology, IWAIT 2024",
}