@article{a5e06b0188404aa891ab5013b06c0abd,
title = "Multiple comparisons in induction algorithms",
abstract = "A single mechanism is responsible for three pathologies of induction algorithms: attribute selection errors, overfitting, and oversearching. In each pathology, induction algorithms compare multiple items based on scores from an evaluation function and select the item with the maximum score. We call this a multiple comparison procedure (MCP). We analyze the statistical properties of MCPs and show how failure to adjust for these properties leads to the pathologies. We also discuss approaches that can control pathological behavior, including Bonferroni adjustment, randomization testing, and cross-validation.",
author = "Jensen, {David D.} and Cohen, {Paul R.}",
note = "Funding Information: The authors wish to thank Tim Oates, Paul Utgoff, Gunnar Blix, Warren Greiff, and David Hand for comments on drafts of this paper. This research is supported by DARPA/Rome Laboratory under contract No. #F30602-93-C-0100. The U.S. Government is authorized to reproduce and distribute reprints for governmental purposes notwithstanding any copyright notation hereon. The views and conclusions contained herein are those of the authors and should not be interpreted as necessarily representing the official policies or endorsements either expressed or implied, of the Defense Advanced Research Projects Agency, Rome Laboratory or the U.S. Government.",
year = "2000",
doi = "10.1023/A:1007631014630",
language = "English (US)",
volume = "38",
pages = "309--338",
journal = "Machine Learning",
issn = "0885-6125",
publisher = "Springer Netherlands",
number = "3",
}