@inproceedings{80497836041b43ddbdae13d39f858fec,
title = "Identifying language groups within multilingual cybercriminal forums",
abstract = "Online cybercriminal communities exist in various geopolitical regions, including America, China, Russia, and more. Some multilingual forums exist where cybercriminals of differing geopolitical origin interact and exchange hacking knowledge and cybercriminal assets. Researchers can study such forums to better understand the global cybercriminal supply chain and cybercrime trends. However, little work has focused on identifying members of different language groups and geopolitical origin within such forums. One challenge is the necessity of a technique that scales across multiple languages. We are motivated to explore computational techniques that support automated and scalable categorization of cybercriminal forum participants into varying language groups. In particular, we make use of Paragraph Vectors, a state-of-The-Art neural network language model to generate fixed-length vector representations (i.e., document embeddings) of messages posted by forum participants. Results indicate Paragraph Vectors outperforms traditional n-gram frequency approaches for generating document embeddings that are useful for clustering cybercriminals into language groups.",
keywords = "Cybecrminal community, Cybersecurity, Language modeling, Multilingual, Neural network",
author = "Victor Benjamin and Hsinchun Chen",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 14th IEEE International Conference on Intelligence and Security Informatics, ISI 2015 ; Conference date: 28-09-2016 Through 30-09-2016",
year = "2016",
month = nov,
day = "15",
doi = "10.1109/ISI.2016.7745471",
language = "English (US)",
series = "IEEE International Conference on Intelligence and Security Informatics: Cybersecurity and Big Data, ISI 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "205--207",
editor = "Wenji Mao and Wang, {G. Alan} and Lina Zhou and Lisa Kaati",
booktitle = "IEEE International Conference on Intelligence and Security Informatics",
}