@inproceedings{08e08f1d53be40948a65b8c450e2ad90,
title = "ADATS: Adaptive RoI-Align based Transformer for End-to-End Text Spotting",
abstract = "Scene text spotting has attracted great attention in recent years. Compared with two-stage approaches that locate scene texts in the first stage and recognize them in the second stage, the advantages of joint location and recognition training are not fully explored. In this paper, we present an ADaptive RoI-Align based transformer for end-to-end Text Spotting (ADATS), which simultaneously locates and recognizes text with a single forward pass. By employing an Adaptive RoI-Align, the text features are extracted from the feature extraction network with the original aspect ratio, such that less information is lost during the alignment of arbitrarily-shaped scene text. Attention-based segmentation and recognition heads allow us to simultaneously optimize detection and recognition. Experiments on ICDAR 2015, MSRA-TD500, Total-Text, and CTW1500 demonstrate the effectiveness of our method.",
keywords = "End-to-end text spotting, segmentation, text detection, text recognition",
author = "Zepeng Huang and Qi Wan and Junliang Chen and Xiaodong Zhao and Kai Ye and Linlin Shen",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE International Conference on Multimedia and Expo, ICME 2023 ; Conference date: 10-07-2023 Through 14-07-2023",
year = "2023",
doi = "10.1109/ICME55011.2023.00243",
language = "English",
series = "Proceedings - IEEE International Conference on Multimedia and Expo",
publisher = "IEEE Computer Society",
pages = "1403--1408",
booktitle = "Proceedings - 2023 IEEE International Conference on Multimedia and Expo, ICME 2023",
address = "United States",
}