@inproceedings{5bf4aa5e12ae40bd9da783fa1d2c73d1,
title = "Evaluating the Potential of Coscheduling on High-Performance Computing Systems",
abstract = "Modern high-performance computing (HPC) system designs have converged to heavyweight nodes with growing numbers of processors. If schedulers on these systems allocate nodes in an exclusive and dedicated manner, many HPC applications and scientific workflows will be unable to fully utilize and benefit from such hardware. This is because at such extreme scale, it will be difficult for modern HPC applications to utilize all of the node-level resources on these systems. In this paper, we investigate the potential of moving away from dedicated node allocation and instead using intelligent coscheduling—where multiple jobs can share node-level resources—to improve node utilization and therefore job turnaround time. We design and implement a coscheduling simulator, and, using traces from a high-end HPC cluster with 100K jobs and 1158 nodes, demonstrate that coscheduling can improve average turnaround times by up to 18% when compared to easy backfilling. Our results indicate that coscheduling has the potential to be a more efficient way to schedule jobs on high-end machines in both turnaround time and system and component utilization.",
keywords = "coscheduling, high-performance computing",
author = "Jason Hall and Arjun Lathi and Lowenthal, {David K.} and Tapasya Patki",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 26th workshop on Job Scheduling Strategies for Parallel Processing, JSSPP 2023 ; Conference date: 19-05-2023 Through 19-05-2023",
year = "2023",
doi = "10.1007/978-3-031-43943-8_8",
language = "English (US)",
isbn = "9783031439421",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "155--172",
editor = "Dalibor Klus{\'a}{\v c}ek and Julita Corbal{\'a}n and Rodrigo, {Gonzalo P.}",
booktitle = "Job Scheduling Strategies for Parallel Processing - 26th Workshop, JSSPP 2023, Revised Selected Papers",
address = "Germany",
}