@inproceedings{d435e551186842c3819f48b1b81e37f1,
title = "Automated lexicon and feature construction using word embedding and clustering for classification of asd diagnoses using EHR",
abstract = "Using electronic health records of children evaluated for Autism Spectrum Disorders, we are developing a decision support system for automated diagnostic criteria extraction and case classification. We manually created 92 lexicons which we tested as features for classification and compared with features created automatically using word embedding. The expert annotations used for manual lexicon creation provided seed terms that were expanded with the 15 most similar terms (Word2Vec). The resulting 2,200 terms were clustered in 92 clusters parallel to the manually created lexicons. We compared both sets of features to classify case status with a FF\BP neural network (NN) and C5.0 decision tree. For manually created lexicons, classification accuracy was 76.92% for the NN and 84.60% for C5.0. For the automatically created lexicons, accuracy was 79.78% for the NN and 86.81% for C5.0. Automated lexicon creation required a much shorter development time and brought similarly high quality outcomes.",
keywords = "Autism spectrum disorders, Classification, Clustering, EHR, Electronic health records, NLP, Natural language processing, Word embedding",
author = "Gondy Leroy and Yang Gu and Sydney Pettygrove and Margaret Kurzius-Spencer",
note = "Funding Information: The EHR were collected by the Centers for Disease Control and Prevention Autism and Developmental Disabilities Monitoring (ADDM) Network. Pettygrove and Kurzius-Spencer received support from CDC Cooperative Agreement Number 5UR3/DD000680. The conclusions presented here are those of the authors and do not represent the official position of the Centers for Disease Control and Prevention. Publisher Copyright: {\textcopyright} Springer International Publishing AG 2017.; 22nd International Conference on Applications of Natural Language to Information Systems, NLDB 2017 ; Conference date: 21-06-2017 Through 23-06-2017",
year = "2017",
doi = "10.1007/978-3-319-59569-6_4",
language = "English (US)",
isbn = "9783319595689",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "34--37",
editor = "Flavius Frasincar and Ashwin Ittoo and Elisabeth Metais and Nguyen, {Le Minh}",
booktitle = "Natural Language Processing and Information Systems - 22nd International Conference on Applications of Natural Language to Information Systems, NLDB 2017, Proceedings",
}