@inproceedings{2fb7946769514fb8ba2dd2f03ee4d961,
title = "How specialized are specialized corpora? Behavioral evaluation of corpus representativeness for Maltese",
abstract = "In this paper we bring to light a novel intersection between corpus linguistics and behavioral data that can be employed as an evaluation metric for resources for low-density languages, drawing on well-established psycholinguistic factors. Using the low-density language Maltese as a test case, we highlight the challenges that face researchers developing resources for languages with sparsely available data and identify a key empirical link between corpus and psycholinguistic research as a tool to evaluate corpus resources. Specifically, we compare two robust variables identified in the psycholinguistic literature: word frequency (as measured in a corpus) and word familiarity (as measured in a rating task). We then use three statistical methods to evaluate these comparisons. This research provides a multidisciplinary approach to corpus development and evaluation, in particular for less-resourced languages that lack a wide access to diverse language data.",
author = "Jerid Francom and {La Cross}, Amy and Ussishkin, {Adam P}",
year = "2010",
language = "English (US)",
series = "Proceedings of the 7th International Conference on Language Resources and Evaluation, LREC 2010",
publisher = "European Language Resources Association (ELRA)",
pages = "421--427",
editor = "Daniel Tapias and Irene Russo and Olivier Hamon and Stelios Piperidis and Nicoletta Calzolari and Khalid Choukri and Joseph Mariani and Helene Mazo and Bente Maegaard and Jan Odijk and Mike Rosner",
booktitle = "Proceedings of the 7th International Conference on Language Resources and Evaluation, LREC 2010",
note = "7th International Conference on Language Resources and Evaluation, LREC 2010 ; Conference date: 17-05-2010 Through 23-05-2010",
}