@inproceedings{204ebf8b1eb34f708cdbe05f3252fa1b,
title = "Using content-based and link-based analysis in building vertical search engines",
abstract = "This paper reports our research in the Web page filtering process in specialized search engine development. We propose a machine-learning-based approach that combines Web content analysis and Web structure analysis. Instead of a bag of words, each Web page is represented by a set of content-based and link-based features, which can be used as the input for various machine learning algorithms. The proposed approach was implemented using both a feedforward/backpropagation neural network and a support vector machine. An evaluation study was conducted and showed that the proposed approaches performed better than the benchmark approaches.",
author = "Michael Chau and Hsinchun Chen",
note = "Funding Information: This project has been supported in part by the following grants: NSF Digital Library Initiative-2, “High-performance Digital Library Systems: From Information Retrieval to Knowledge Management” (IIS-9817473, Apr 1999-Mar 2002), NIH/NLM Grant (PI: H. Chen), “UMLS Enhanced Dynamic Agents to Manage Medical Knowledge” (1 R01 LM06919-1A1, Feb 2001-Jan 2004), and HKU Seed Funding for Basic Research, “Using Content and Link Analysis in Developing Domain-specific Web Search Engines: A Machine Learning Approach” (Feb 2004-Jul 2005). We also thank the medical experts who participated in the user studies. Publisher Copyright: {\textcopyright} Springer-Verlag Berlin Heidelberg 2004.; 7th International Conference on Asian Digital Libraries, ICADL 2004 ; Conference date: 13-12-2004 Through 17-12-2004",
year = "2005",
doi = "10.1007/978-3-540-30544-6\_3",
language = "English (US)",
isbn = "9783540240303",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "515--518",
editor = "Qihao Miao and Ee-peng Lim and Zhaoneng Chen and Yuxi Fu and Hsinchun Chen and Edward Fox",
booktitle = "Digital Libraries",
}