@inproceedings{28f20b3a7250458fa987761853c0955d,
title = "Data poisoning attacks in contextual bandits",
abstract = "We study offline data poisoning attacks in contextual bandits, a class of reinforcement learning problems with important applications in online recommendation and adaptive medical treatment, among others. We provide a general attack framework based on convex optimization and show that by slightly manipulating rewards in the data, an attacker can force the bandit algorithm to pull a target arm for a target contextual vector. The target arm and target contextual vector are both chosen by the attacker. That is, the attacker can hijack the behavior of a contextual bandit. We also investigate the feasibility and the side effects of such attacks, and identify future directions for defense. Experiments on both synthetic and real-world data demonstrate the efficiency of the attack algorithm.",
keywords = "Adversarial attack, Contextual bandit, Data poisoning",
author = "Yuzhe Ma and Jun, {Kwang Sung} and Lihong Li and Xiaojin Zhu",
note = "Funding Information: Acknowledgment. This work is supported in part by NSF 1545481, 1704117, 1623605, 1561512, and the MADLab AF Center of Excellence FA9550-18-1-0166. Publisher Copyright: {\textcopyright} 2018, Springer Nature Switzerland AG.; 9th International Conference on Decision and Game Theory for Security, GameSec 2018 ; Conference date: 29-10-2018 Through 31-10-2018",
year = "2018",
doi = "10.1007/978-3-030-01554-1_11",
language = "English (US)",
isbn = "9783030015534",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "186--204",
editor = "Linda Bushnell and Radha Poovendran and Tamer Basar",
booktitle = "Decision and Game Theory for Security - 9th International Conference, GameSec 2018, Proceedings",
}