BibTex format
@article{Luporini:2015:10.1145/2687415,
author = {Luporini, F and Varbanescu, AL and Rathgeber, F and Bercea, G-T and Ramanujam, J and Ham, DA and Kelly, PHJ},
doi = {10.1145/2687415},
journal = {ACM Transactions on Architecture and Code Optimization},
pages = {1--25},
title = {Cross-Loop Optimization of Arithmetic Intensity for Finite Element Local Assembly},
url = {http://dx.doi.org/10.1145/2687415},
volume = {11},
year = {2015}
}
RIS format (EndNote, RefMan)
TY - JOUR
AB - <jats:p>We study and systematically evaluate a class of composable code transformations that improve arithmetic intensity in local assembly operations, which represent a significant fraction of the execution time in finite element methods. Their performance optimization is indeed a challenging issue. Even though affine loop nests are generally present, the short trip counts and the complexity of mathematical expressions, which vary among different problems, make it hard to determine an optimal sequence of successful transformations. Our investigation has resulted in the implementation of a compiler (called COFFEE) for local assembly kernels, fully integrated with a framework for developing finite element methods. The compiler manipulates abstract syntax trees generated from a domain-specific language by introducing domain-aware optimizations for instruction-level parallelism and register locality. Eventually, it produces C code including vector SIMD intrinsics. Experiments using a range of real-world finite element problems of increasing complexity show that significant performance improvement is achieved. The generality of the approach and the applicability of the proposed code transformations to other domains is also discussed.</jats:p>
AU - Luporini,F
AU - Varbanescu,AL
AU - Rathgeber,F
AU - Bercea,G-T
AU - Ramanujam,J
AU - Ham,DA
AU - Kelly,PHJ
DO - 10.1145/2687415
EP - 25
PY - 2015///
SN - 1544-3566
SP - 1
TI - Cross-Loop Optimization of Arithmetic Intensity for Finite Element Local Assembly
T2 - ACM Transactions on Architecture and Code Optimization
UR - http://dx.doi.org/10.1145/2687415
UR - http://hdl.handle.net/10044/1/24006
VL - 11
ER -