%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Please use any of the following publications to reference MAGMA, as related to % 1. MAGMA General % 2. MAGMA BLAS % 3. clMAGMA (OpenCL) % 4. MAGMA MIC (Xeon Phi) % 5. MAGMA Dynamic (CUDA + OpenCL + Xeon Phi ...) % 6. MAGMA Sparse % 7. MAGMA Batched % 8. MAGMA Embedded % 9. MAGMA Autotuning % 10. MAGMA Distributed % 11. MagmaDNN % 12. hipMAGMA (HIP for AMD GPUs) % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 1. General MAGMA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @article{tdb10, title = {{Towards dense linear algebra for hybrid GPU accelerated manycore systems}}, author = {Stanimire Tomov and Jack Dongarra and Marc Baboulin}, booktitle = {Parallel Matrix Algorithms and Applications}, doi = {10.1016/j.parco.2009.12.005}, issn = {0167-8191}, journal = {Parallel Computing}, month = jun, number = {5-6}, pages = {232--240}, posted-at = {2010-12-17 09:48:58}, priority = {2}, volume = {36}, year = {2010} } @inproceedings{tnld10, title = {Dense Linear Algebra Solvers for Multicore with {GPU} Accelerators}, author = {Stanimire Tomov and Rajib Nath and Hatem Ltaief and Jack Dongarra}, address = {Atlanta, GA}, booktitle = {Proc. of the IEEE IPDPS'10}, month = {April 19-23}, note = {{DOI:~10.1109/IPDPSW.2010.5470941}}, pages = {1-8}, publisher = {IEEE Computer Society}, year = {2010} } @article{dghklty14, title = {Accelerating Numerical Dense Linear Algebra Calculations with GPUs}, author = {Jack Dongarra and Mark Gates and Azzam Haidar and Jakub Kurzak and Piotr Luszczek and Stanimire Tomov and Ichitaro Yamazaki}, journal = {Numerical Computations with GPUs}, pages = {1-26}, year = {2014}, publisher = {Springer} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 2. MAGMA BLAS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @article{ntd10, title = {{An Improved MAGMA GEMM For Fermi Graphics Processing Units}}, author = {Rajib Nath and Stanimire Tomov and Jack Dongarra}, journal = {Int. J. High Perform. Comput. Appl.}, issue_date= {November 2010}, volume = {24}, number = {4}, month = nov, year = {2010}, issn = {1094-3420}, pages = {511--515}, numpages = {5}, url = {http://dx.doi.org/10.1177/1094342010385729}, doi = {10.1177/1094342010385729}, acmid = {1889710}, publisher = {Sage Publications, Inc.}, address = {Thousand Oaks, CA, USA} } @inproceedings{ntd10_vecpar, title = {Accelerating {GPU} Kernels for Dense Linear Algebra}, author = {Rajib Nath and Stanimire Tomov and Jack Dongarra}, address = {Berkeley, CA}, booktitle = {Proceedings of the 2009 International Meeting on High Performance Computing for Computational Science, VECPAR'10}, month = {June 22-25}, publisher = {Springer}, year = {2010} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 3. clMAGMA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @conference {hcydglt14, title = {{Performance and Portability with OpenCL for Throughput-Oriented HPC Workloads Across Accelerators, Coprocessors, and Multicore Processors}}, author = {Azzam Haidar and Chongxiao Cao and Ichitaro Yamazaki and Jack Dongarra and Mark Gates and Piotr Luszczek and Stanimire Tomov}, booktitle = {5th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA 14)}, year = {2014}, month = {11-2014}, publisher = {IEEE}, address = {New Orleans, LA}, doi = {10.1109/ScalA.2014.8} } @article{dwltpd12, title = {{From CUDA to OpenCL: Towards a Performance-portable Solution for Multi-platform GPU Programming}}, author = {Peng Du and Rick Weber and Piotr Luszczek and Stanimire Tomov and Gregory Peterson and Jack Dongarra}, journal = {Parallel Comput.}, issue_date= {August, 2012}, volume = {38}, number = {8}, month = aug, year = {2012}, issn = {0167-8191}, pages = {391--407}, numpages = {17}, url = {http://dx.doi.org/10.1016/j.parco.2011.10.002}, doi = {10.1016/j.parco.2011.10.002}, acmid = {2318445}, publisher = {Elsevier Science Publishers B. V.}, address = {Amsterdam, The Netherlands, The Netherlands} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 4. MAGMA MIC %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @article {hdkgltj15, title = {HPC Programming on Intel Many-Integrated-Core Hardware with MAGMA Port to Xeon Phi}, author = {Azzam Haidar and Jack Dongarra and Khairul Kabir and Mark Gates and Piotr Luszczek and Stanimire Tomov and Yulu Jia}, journal = {Scientific Programming}, volume = {23}, year = {2015}, month = {01-2015}, issn = {1058-9244}, doi = {10.3233/SPR-140404} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 5. MAGMA Dynamic %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @inproceedings{hcyltkd14, title = {{Unified Development for Mixed Multi-GPU and Multi-coprocessor Environments Using a Lightweight Runtime Environment}}, author = {Azzam Haidar and Chongxiao Cao and Asim Yarkhan and Piotr Luszczek and Stanimire Tomov and Khairul Kabir and Jack Dongarra}, booktitle = {Proceedings of the 2014 IEEE 28th International Parallel and Distributed Processing Symposium}, series = {IPDPS '14}, year = {2014}, isbn = {978-1-4799-3800-1}, pages = {491--500}, numpages = {10}, url = {http://dx.doi.org/10.1109/IPDPS.2014.58}, doi = {10.1109/IPDPS.2014.58}, acmid = {2650549}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 6. MAGMA Sparse %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @conference {tlydas14, title = {Optimizing Krylov Subspace Solvers on Graphics Processing Units}, author = {Hartwig Anzt and William Sawyer and Stanimire Tomov and Piotr Luszczek and Ichitaro Yamazaki and Jack Dongarra}, booktitle = {Fourth International Workshop on Accelerators and Hybrid Exascale Systems (AsHES), IPDPS 2014}, year = {2014}, month = {05-2014}, publisher = {IEEE}, organization= {IEEE}, address = {Phoenix, AZ} } @conference {yathd14, title = {Improving the performance of CA-GMRES on multicores with multiple GPUs}, author = {Ichitaro Yamazaki and Hartwig Anzt and Stanimire Tomov and Mark Hoemmen and Jack Dongarra}, booktitle = {IPDPS 2014}, year = {2014}, month = {05-2014}, publisher = {IEEE}, address = {Phoenix, AZ} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 7. MAGMA Batched %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @conference {hdtld15, title = {Framework for Batched and GPU-resident Factorization Algorithms to Block Householder Transformations}, author = {Azzam Haidar and Tingxing Dong and Stanimire Tomov and Piotr Luszczek and Jack Dongarra}, booktitle = {ISC High Performance}, year = {2015}, month = {07-2015}, publisher = {Springer}, organization= {Springer}, address = {Frankfurt, Germany} } @techreport{tensors, title = {{High-Performance Tensor Contractions for GPUs}}, author = {A. Abdelfattah and M. Baboulin and V. Dobrev and J. Dongarra and C. Earl and J. Falcou and A. Haidar and I. Karlin and T. Kolev and I. Masliah and S. Tomov}, journal = {University of Tennessee Computer Science Technical Report}, number = {UT-EECS-16-738}, year = {2016}, month = {01-2016}, publisher = {University of Tennessee} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 8. MAGMA Embedded %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @conference {htld15, title = {MAGMA Embedded: Towards a Dense Linear Algebra Library for Energy Efficient Extreme Computing}, author = {Azzam Haidar and Stanimire Tomov and Piotr Luszczek and Jack Dongarra}, booktitle = {2015 IEEE High Performance Extreme Computing Conference (HPEC 15), (Best Paper Award)}, year = {2015}, month = {09-2015}, publisher = {IEEE}, organization= {IEEE}, address = {Waltham, MA} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 9. MAGMA Autotuning %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @inproceedings{ldt09, title = {A Note on Auto-tuning {GEMM} for {GPUs}}, address = {Baton Roube, LA}, author = {Yinan Li and Jack Dongarra and Stanimire Tomov}, booktitle = {Proceedings of the 2009 International Conference on Computational Science, ICCS'09}, month = {May 25-27}, publisher = {Springer}, year = {2009} } @article{ktd12, title = {Autotuning {GEMM} Kernels for the {Fermi} {GPU}}, author = {Jakub Kurzak and Stanimire Tomov and Jack Dongarra}, journal = {IEEE Transactions on Parallel and Distributed Systems}, volume = 23, number = 11, pages = {2045-2057}, month = {November}, year = 2012 } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 10. MAGMA Distributed %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @conference {hycltd15, title = {Flexible Linear Algebra Development and Scheduling with Cholesky Factorization}, author = {Azzam Haidar and Asim YarKhan and Chongxiao Cao and Piotr Luszczek and Stanimire Tomov and Jack Dongarra}, booktitle = {17th IEEE International Conference on High Performance Computing and Communications}, year = {2015}, month = {08-2015}, address = {Newark, NJ} } @inproceedings {std12, title = {Enabling and Scaling Matrix Computations on Heterogeneous Multi-Core and Multi-GPU Systems}, author = {Fengguang Song and Stanimire Tomov and Jack Dongarra}, journal = {26th ACM International Conference on Supercomputing (ICS 2012)}, year = {2012}, month = {06-2012}, publisher = {ACM}, address = {San Servolo Island, Venice, Italy} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 11. MagmaDNN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @conference {ntbtwd19, title = {MagmaDNN: Towards High-Performance Data Analytics and Machine Learning for Data-Driven Scientific Computing}, booktitle = {ISC High Performance}, year = {2019}, month = {2019-06}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Frankfurt, Germany}, author = {Daniel Nichols and Natalie-Sofia Tomov and Frank Betancourt and Stanimire Tomov and Kwai Wong and Jack Dongarra} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 12. hipMAGMA (HIP for AMD GPUs) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @inproceedings{BrownATD20, author = {Cade Brown and Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, title = {Design, Optimization, and Benchmarking of Dense Linear Algebra Algorithms on {AMD} GPUs}, booktitle = {2020 {IEEE} High Performance Extreme Computing Conference, {HPEC} 2020, Waltham, MA, USA, September 22-24, 2020}, pages = {1--7}, publisher = {{IEEE}}, year = {2020}, url = {https://doi.org/10.1109/HPEC43674.2020.9286214}, doi = {10.1109/HPEC43674.2020.9286214} }