@inproceedings{a1d4beac95d146809fee6a2830e26c74,
title = "Analysis of memory constrained live provenance",
abstract = "We conjecture that meaningful analysis of large-scale provenance can be preserved by analyzing provenance data in limited memory while the data is still in motion; that the provenance needs not be fully resident before analysis can occur. As a proof of concept, this paper defines a stream model for reasoning about provenance data in motion for Big Data provenance.We propose a novel streaming algorithm for the backward provenance query, and apply it to the live provenance captured from agent-based simulations. The performance test demonstrates high throughput, low latency and good scalability, in a distributed stream processing framework built on Apache Kafka and Spark Streaming.",
keywords = "Agent-Based model, Live data provenance, Stream processing",
author = "Peng Chen and Tom Evans and Beth Plale",
note = "Funding Information: This work is funded in part by the National Science Foundation under award number 1360463. Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2016.; 6th International Provenance and Annotation Workshop, IPAW 2016 ; Conference date: 07-06-2016 Through 08-06-2016",
year = "2016",
doi = "10.1007/978-3-319-40593-3_4",
language = "English (US)",
isbn = "9783319405926",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "42--54",
editor = "Boris Glavic and Marta Mattoso",
booktitle = "Provenance and Annotation of Data and Processes - 6th International Provenance and Annotation Workshop, IPAW 2016, Proceedings",
}