@inproceedings{bcaeaf0283e543518cc9e117556f5357,
title = "Automatic Generation of a Large Multiple-Choice Question-Answer Corpus",
abstract = "Large corpora with fine-grained metrics for difficulty and understandability are a critical resource for developing algorithms and tools to create more informative content. We introduce a new approach for automatically generating a large corpus of health-related content with associated multiple-choice questions using Google{\textquoteright}s related questions and ChatGPT, including two new algorithms for generating potential wrong answers. We compare both the question quality as well as the suggested wrong answers using automated metrics and user studies. Overall, we find both algorithms generate reasonable questions that are complementary. Google questions use more accessible language and are easier to answer while ChatGPT questions appear easier, but are more difficult to answer and have better coverage over the entire text. For wrong answer generation, we find ChatGPT produces higher quality wrong answers that are more likely to be good distractors and are more closely related to the text content than our corpus-based approaches. We recommend both questions as options for studies with wrong answers generated by ChatGPT.",
keywords = "Corpus generation, Large language model applications, Text difficulty",
author = "David Kauchak and Vivien Song and Prashant Mishra and Gondy Leroy and Harber, {Philip I} and Stephen Rains and John Hamre and Nick Morgenstein",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.; Intelligent Systems Conference, IntelliSys 2024 ; Conference date: 05-09-2024 Through 06-09-2024",
year = "2024",
doi = "10.1007/978-3-031-66428-1_4",
language = "English (US)",
isbn = "9783031664274",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "55--72",
editor = "Kohei Arai",
booktitle = "Intelligent Systems and Applications - Proceedings of the 2024 Intelligent Systems Conference IntelliSys Volume 2",
address = "Germany",
}