@inproceedings{7ad8902faa074ecf812e828c7287a348,
title = "Recognizing ontology-applicable multiple-record web documents",
abstract = "Automatically recognizing which Web documents are “of interest” for some specified application is non-trivial. As a step toward solving this problem, we propose a technique for recognizing which multiple record Web documents apply to an ontologically specified application. Given the values and kinds of values recognized by an ontological specification in an unstructured Web document, we apply three heuristics: (1) a density heuristic that measures the percent of the document that appears to apply to an application ontology, (2) an expected-value heuristic that compares the number and kind of values found in a document to the number and kind expected by the application ontology, and (3) a grouping heuristic that considers whether the values of the document appear to be grouped as application-ontology records. Then, based on machine-learned rules over these heuristic measurements, we determine whether a Web document is applicable for a given ontology. Our experimental results show that we have been able to achieve over 90% for both recall and precision, with an F-measure of about 95%.",
author = "Embley, {David W.} and Ng, {Yiu Kai} and Li Xu",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag Berlin Heidelberg 2001.; 20th International Conference on Conceptual Modeling, ER 2001 ; Conference date: 27-11-2001 Through 30-11-2001",
year = "2001",
doi = "10.1007/3-540-45581-7_41",
language = "English (US)",
isbn = "3540428666",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "555--570",
editor = "Kunii, {Hideko S.} and Sushil Jajodia and Arne Solvberg",
booktitle = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
}