@inbook{c2b2f10a3c844dd1b59389394788bc8c,
title = "Allele identification in assembled genomic sequence datasets",
abstract = "Allelic variation within species provides fundamental insights into the evolution and ecology of organisms, and information about this variation is becoming increasingly available in sequence datasets of multiple and/or outbred individuals. Unfortunately, identifying true allelic variants poses a number of challenges, given the presence of both sequencing errors and alleles from other closely related loci. We outline the key considerations involved in this process, including assessing the accuracy of allele resolution in sequence assembly, clustering of alleles within and among individuals, and identifying clusters that are most likely to correspond to true allelic variants of a single locus. Our focus is particularly on the case where alleles must be identified without a fully resolved reference genome, and where sequence depth information cannot be used to infer the putative number of loci sharing a sequence, such as in transcriptome or post-assembly datasets. Throughout, we provide information about publicly available tools to aid allele identification in such cases.",
keywords = "AllelePipe, Allelic variation, Gene duplication, Granularity, Maximum likelihood clustering, Next-generation sequencing, Paralogs, Single-linkage clustering, Transcriptome data",
author = "Dlugosch, {Katrina M.} and Aur{\'e}lie Bonin",
year = "2012",
doi = "10.1007/978-1-61779-870-2_12",
language = "English (US)",
isbn = "9781617798696",
series = "Methods in Molecular Biology",
publisher = "Humana Press Inc.",
pages = "197--211",
booktitle = "Data Production and Analysis in Population Genomics",
}