@article{3c4068e84c384d2bb2b9c43d0165be0e,
title = "Mapping the “long tail” of research funding: A topic analysis of NSF grant proposals in the division of astronomical sciences",
abstract = "“Long tail” data are considered to be smaller, heterogeneous, researcher-held data, which present unique data management and scholarly communication challenges. These data are presumably concentrated within relatively lower-funded projects due to insufficient resources for curation. To better understand the nature and distribution of long tail data, we examine National Science Foundation (NSF) funding patterns using Latent Dirichlet Allocation (LDA) and bibliographic data. We also introduce the concept of “Topic Investment” to capture differences in topics across funding levels and to illuminate the distribution of funding across topics. This study uses the discipline of astronomy as a case study, overall exploring possible associations between topic, funding level and research output, with implications for research policy and practice. We find that while different topics demonstrate different funding levels and publication patterns, dynamics predicted by the “long tail” theoretical framework presented here can be observed within NSF-funded topics in astronomy.",
keywords = "astronomy, data curation, long tail, research funding, topic analysis",
author = "Stahlman, {Gretchen R.} and Heidorn, {P. Bryan}",
note = "Funding Information: As a concept borrowed from economics, the “long tail” originally refers to niche markets, where some consumer goods are in high demand, but where obscure items may attract attention and become useful if readily available to be discovered (Anderson, 2007 ). In 2008, Heidorn demonstrated a financial distribution of research funded by the National Science Foundation that resembles the power‐law distribution of long tail economics, with 20% of funding in the “head” and 80% in the “tail” (Heidorn, 2008). Long before the introduction of NSF's data management plan requirement for grant proposals in 2009, large NSF projects were required to plan for data management. Heidorn theorized that an abundance of inaccessible “dark data” corresponding to smaller projects are concentrated in the long tail of the funding distribution (2008, 2011) – data that could also become useful to other researchers if adequate resources, support and incentives for curation are available. Since that time, ubiquitous references to long tail data in the literature characterize this distribution in terms of size of data, demand for data, visibility and accessibility of data, and level of research competition with respect to data (Borgman, et al., 2016 ; Brooks, et al., 2016 ; Ferguson, et al., 2014 ; Heidorn, et al., 2015; Liang, et al., 2010 ; Malik & Foster, 2012 ; Palmer, et al., 2007 ; Wallis, Rolando & Borgman, 2013 ). The ubiquitousness with which this terminology has been deployed indicates that data management issues abound across disciplines, despite the existence of data management plans, disciplinary infrastructures, improved standards and increased awareness of the benefits of data sharing for individual researchers and research communities. Publisher Copyright: 83rd Annual Meeting of the Association for Information Science & Technology October 25-29, 2020. Author(s) retain copyright, but ASIS&T receives an exclusive publication license.",
year = "2020",
doi = "10.1002/pra2.276",
language = "English (US)",
volume = "57",
journal = "Proceedings of the Association for Information Science and Technology",
issn = "2373-9231",
publisher = "John Wiley and Sons Inc.",
number = "1",
}