@inproceedings{99d6ac50e2f449859f92c3258b56fb39,
title = "MathAlign: Linking formula identifiers to their contextual natural language descriptions",
abstract = "Extending machine reading approaches to extract mathematical concepts and their descriptions is useful for a variety of tasks, ranging from mathematical information retrieval to increasing accessibility of scientific documents for the visually impaired. This entails segmenting mathematical formulae into identifiers and linking them to their natural language descriptions. We propose a rule-based approach for this task, which extracts LATEX representations of formula identifiers and links them to their in-text descriptions, given only the original PDF and the location of the formula of interest. We also present a novel evaluation dataset for this task, as well as the tool used to create it. The data and the source code are open source and are available at https://osf.io/bdxmr/ and https://github.com/ml4ai/automates, respectively.",
keywords = "Corpus creation, Machine reading, Math information retrieval, Relation extraction, Tool creation",
author = "Maria Alexeeva and Rebecca Sharp and Valenzuela-Esc{\'a}rcega, \{Marco A.\} and Jennifer Kadowaki and Adarsh Pyarelal and Clayton Morrison",
note = "Publisher Copyright: {\textcopyright} European Language Resources Association (ELRA), licensed under CC-BY-NC; 12th International Conference on Language Resources and Evaluation, LREC 2020 ; Conference date: 11-05-2020 Through 16-05-2020",
year = "2020",
language = "English (US)",
series = "LREC 2020 - 12th International Conference on Language Resources and Evaluation, Conference Proceedings",
publisher = "European Language Resources Association (ELRA)",
pages = "2204--2212",
editor = "Nicoletta Calzolari and Frederic Bechet and Philippe Blache and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis",
booktitle = "LREC 2020 - 12th International Conference on Language Resources and Evaluation, Conference Proceedings",
}