@inproceedings{e3f551647bdd4324b9db292d790deaf0,
title = "Value Based Scheduling for Oversubscribed Power-Constrained Homogeneous HPC Systems",
abstract = "Power-aware scheduling has become a critical research thrust for deploying exascale High Performance Computing (HPC) systems with limited power budget. Time-varying pricing of electricity with respect to the market demand and dynamic HPC workloads can lead to unpredictable operational cost, which complicates the scheduling decisions further. For an oversubscribed HPC system, value based scheduling heuristics have been shown to be a more productive option for scheduling time-constrained tasks over priority and deadline based heuristics. However, oversubscribed HPC systems have higher probability of exceeding the power constraints. Earlier studies on value based heuristics do not take power constraints into account during scheduling decisions. In this study, we propose a methodology for deriving task-specific power-execution time models. These models are derived by interpolating the execution time and power consumption measurements over a configuration space parameterized with pairs of dynamic voltage frequency scaling and forced idleness values. We then propose two power-aware value based heuristics, which utilize those models for power capping the nodes and making resource allocation decisions in an oversubscribed homogeneous HPC system. We compare their performance with traditional value based heuristics under a defined power constraint on a real system using different synthetic traces of scientific computing routines. We show that, as power constraints become tighter, the proposed heuristics significantly outperform earlier heuristics in terms of value earning of the HPC system. We also compare the task completion percentage of proposed heuristics and relate the completion percentage with value earnings of the heuristics.",
keywords = "HPC, High Performance Computing, heuristics, idle injection, power aware, power capping, scheduling, scientific, utility, value based, value function",
author = "Nirmal Kumbhare and Cihan Tunc and Dylan MacHovec and Ali Akoglu and Salim Hariri and Siegel, {Howard Jay}",
note = "Funding Information: This work is partly supported by National Science Foundation (NSF) research projects NSF CNS-1624668 and CCF-1302693. Publisher Copyright: {\textcopyright} 2017 IEEE.; 4th IEEE International Conference on Cloud and Autonomic Computing, ICCAC 2017 ; Conference date: 18-09-2017 Through 22-09-2017",
year = "2017",
month = oct,
day = "9",
doi = "10.1109/ICCAC.2017.19",
language = "English (US)",
series = "Proceedings - 2017 IEEE International Conference on Cloud and Autonomic Computing, ICCAC 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "120--130",
booktitle = "Proceedings - 2017 IEEE International Conference on Cloud and Autonomic Computing, ICCAC 2017",
}