@inproceedings{9468c501fec84dee8b7ba68cb1ae9f69,
title = "Confli-T5: An AutoPrompt Pipeline for Conflict Related Text Augmentation",
abstract = "Recent advances in natural language processing (NLP) and Big Data technologies have been crucial for scientists to analyze political unrest and violence, prevent harm, and promote global conflict management. Government agencies and public security organizations have invested heavily in deep learning-based applications to study global conflicts and political violence. However, such applications involving text classification, information extraction, and other NLP-related tasks require extensive human efforts in annotating/labeling texts. While limited labeled data may drastically hurt the models' performance (over-fitting), large demands on annotation tasks may turn real-world applications impracticable. To address this problem, we propose Confli-T5, a prompt-based method that leverages the domain knowledge from existing political science ontology to generate synthetic but realistic labeled text samples in the conflict and mediation domain. Our model allows generating textual data from the ground up and employs our novel Double Random Sampling mechanism to improve the quality (coherency and consistency) of the generated samples. We conduct experiments over six standard datasets relevant to political science studies to show the superiority of Confli-T5. Our codes are publicly available 1.",
keywords = "CAMEO, classification, coding event data, conflict, generation, natural language processing, text augmentation",
author = "Parolin, {Erick Skorupa} and Yibo Hu and Latifur Khan and Brandt, {Patrick T.} and Javier Osorio and Vito D'Orazio",
note = "Funding Information: The research reported herein was supported in part by NSF awards DMS-1737978, DGE-2039542, OAC-1828467, OAC-1931541, and DGE-1906630, ONR awards N00014-17-1-2995 and N00014-20-1-2738, Army Research Office Contract No. W911NF2110032 and IBM faculty award (Research). Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE International Conference on Big Data, Big Data 2022 ; Conference date: 17-12-2022 Through 20-12-2022",
year = "2022",
doi = "10.1109/BigData55660.2022.10020509",
language = "English (US)",
series = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1906--1913",
editor = "Shusaku Tsumoto and Yukio Ohsawa and Lei Chen and {Van den Poel}, Dirk and Xiaohua Hu and Yoichi Motomura and Takuya Takagi and Lingfei Wu and Ying Xie and Akihiro Abe and Vijay Raghavan",
booktitle = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
}