cmake_minimum_required (VERSION 3.19)

project (asgard VERSION 0.9.0 LANGUAGES CXX)

set(ASGARD_RELEASE_INFO "${asgard_VERSION}") # sync with pip-installer setup.py

include(FetchContent)   # getting external dependencies
include(GNUInstallDirs) # allows management of install paths
include(CMakePackageConfigHelpers) # export project dependencies

###############################################################################
## Set up the compiler and general global build options
###############################################################################

# Set a default build type if none was specified
# https://blog.kitware.com/cmake-and-the-default-build-type/
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
  message (STATUS "Setting build type to 'Release' as none was specified.")
  set (CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE)
  # Set the possible values of build type for cmake-gui
  set_property (CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
endif ()

if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set_property(CACHE CMAKE_INSTALL_PREFIX PROPERTY VALUE "${CMAKE_CURRENT_BINARY_DIR}/asgard_install")
endif()

# set the rpath to the final path with the installation
set(__asgard_install_prefix ${CMAKE_INSTALL_PREFIX})
if (SKBUILD)
    set(__asgard_install_prefix ${ASGARD_python_pip_path})
endif()

list(APPEND CMAKE_INSTALL_RPATH "${__asgard_install_prefix}/${CMAKE_INSTALL_LIBDIR}")
list(APPEND CMAKE_BUILD_RPATH "${__asgard_install_prefix}/${CMAKE_INSTALL_LIBDIR}")

# set up possible commandline input variable defaults (override with -D)
option (BUILD_SHARED_LIBS "Build the ASGarD library with dynamic or static linking" ON)

option (ASGARD_BUILD_TESTS "Build tests for ASGarD" ON)
if (ASGARD_BUILD_TESTS)
  option (ASGARD_LONG_TESTS "Build the longer tests, disable if tests take too long" ON)
endif()

option (ASGARD_USE_OPENMP "Optional OpenMP multi-threading support" OFF)
# the python pip installer is less robust when dealing with many different build options
option (ASGARD_TRYUSE_OPENMP "Try to enable OpenMP but if missing then just build without it (used by pip install)" OFF)
set (ASGARD_TEST_OMP_NUM_THREADS "" CACHE STRING "Sets the OMP_NUM_THREADS variable in testing (only in testing)")

option (ASGARD_USE_CUDA "Optional Nvidia/CUDA support" OFF)
option (ASGARD_USE_ROCM "Optional AMD/ROCm support" OFF)

option (ASGARD_USE_MPI "Optional distributed computing support" OFF)

if (ASGARD_USE_MPI)
  set (ASGARD_MPI_BCAST_THRESHOLD "4" CACHE STRING "Number of ranks to switch from Send/Recv to collective Bcast")
  set (ASGARD_MPI_REDUCE_THRESHOLD "4" CACHE STRING "Number of ranks to switch from Send/Recv to collective Reduce")
  set (ASGARD_TEST_MPI_OMP_NUM_THREADS "" CACHE STRING "Sets the OMP_NUM_THREADS variable in MPI testing (only in testing)")

  if (ASGARD_USE_CUDA AND ASGARD_USE_ROCM)
    option (ASGARD_GPUMPI_DIRECT "Optional enable GPU-Aware MPI communication, make sure the MPI version supports GPU-Aware MPI" OFF)
  endif()
endif()

if (ASGARD_GPUMPI_DIRECT AND NOT ASGARD_USE_MPI) # maybe ASGARD_GPUMPI_DIRECT is set by an external script
  set(ASGARD_GPUMPI_DIRECT OFF CACHE FORCE)
endif()

option (ASGARD_USE_PYTHON "Optional Python tool for post-processing, plotting and quick prototyping" OFF)
option (ASGARD_USE_HIGHFIVE "Use the HighFive HDF5 header library for I/O" OFF)

if ($ENV{ASGARD_BUILD_OPENBLAS})
    option (ASGARD_BUILD_OPENBLAS "Download and build our own OpenBLAS" ON)
else()
    option (ASGARD_BUILD_OPENBLAS "Download and build our own OpenBLAS" OFF)
endif()
option (ASGARD_BUILD_HDF5 "Download and build our own HDF5/HighFive" OFF)

option (ASGARD_USE_PCH "Enable precompiled header files." OFF)

set (ASGARD_PRECISIONS "float;double" CACHE STRING "Select floating point precision, supported values are 'float', 'double' or 'float;double'")
set_property (CACHE ASGARD_PRECISIONS PROPERTY STRINGS "float" "double" "float\;double")

option (ASGARD_USE_TIMER "Enable the builtin profiling tool" ON)
option (ASGARD_FAST_FLAGS "Enable aggressive performance flags for this CPU, -mtune=native and -mfma" OFF)
option (ASGARD_ALWAYS_SAFE_STEP "Always enable the '-safe-step' flag, detects problems early with negligible cost" ON)
option (ASGARD_BUILD_DOCS "Build the documentation." OFF)

option (ASGARD_USE_GITINFO "Optional read git properties and build them into the library" ON)

if (ASGARD_USE_TIMER)
  option (ASGARD_USE_FLOPCOUNTER "Counts flops as part of the timing process" OFF)
endif()

if (ASGARD_RECOMMENDED_FLAGS)
  # add compiler flags we always want to use, mostly for CI purposes
  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native -mfma")
endif ()

if (ASGARD_USE_CUDA AND ASGARD_USE_ROCM)
  message(FATAL_ERROR "Cannot simultaneously use CUDA and ROCm, pick one only")
endif()

if (ASGARD_USE_CUDA OR ASGARD_USE_ROCM)
  set (ASGARD_MAX_NUM_GPUS "1" CACHE STRING "Maximum number of GPU devices to use simultaneously")
  if (ASGARD_MAX_NUM_GPUS LESS 1)
    message(FATAL_ERROR "ASGARD_MAX_NUM_GPUS must be at least 1, but it is set to '${ASGARD_MAX_NUM_GPUS}'")
  endif()

  option(ASGARD_GPU_MEMGREEDY "Use a greedy approach with higher memory usage but also faster speed" ON)
else()
  # if neither CUDA nor ROCm are present, then never use GPUs or GPU-Aware things
  set (ASGARD_MAX_NUM_GPUS "0" CACHE STRING "Maximum number of GPU devices to use simultaneously" FORCE)
  if (ASGARD_GPUMPI_DIRECT) # if set by a script or by mistake
    set(ASGARD_GPUMPI_DIRECT OFF CACHE FORCE)
  endif()
endif()

#Enable HighFive IO if any of the following variables are defined
if(NOT ASGARD_USE_HIGHFIVE)
  if(ASGARD_BUILD_HDF5 OR ASGARD_USE_PYTHON)
    set(ASGARD_USE_HIGHFIVE ON CACHE BOOL "" FORCE)
  endif()
endif()

###############################################################################
# set the list of the PDEs that will be used, needed to configure files
# e.g., _asgard_doc_pdes_str is used in the example file
###############################################################################

# the documentation PDEs are going to be installed and the string will be used
# to configure the examples cmake file, so it needs to be a single string as opposed to a list
# on the other hand, the additional internal (testing) PDEs are just a normal list
set (_asgard_doc_pdes "continuity_2d inputs_1d continuity diffusion spherical_diffusion elliptic robin_bc sinwav two_stream vplb sod_shock burgers bgk")
set (_asgard_gpu_pdes "burgers_gpu")
set (_asgard_int_pdes bound_internal interp_internal relaxation_internal varcoeff_internal misc_internal)

set (_asgard_doc_pdes_str "${_asgard_doc_pdes}") # save for examples configuration
separate_arguments(_asgard_doc_pdes) # switch to list format for the documentation PDEs

set (_asgard_gpu_pdes_str "${_asgard_gpu_pdes}") # save for examples configuration
separate_arguments(_asgard_gpu_pdes)

list(APPEND _asgard_pdes ${_asgard_doc_pdes})
list(APPEND _asgard_pdes ${_asgard_gpu_pdes})
list(APPEND _asgard_pdes ${_asgard_int_pdes})

###############################################################################
## Pull in external support as needed
###############################################################################
list (APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/contrib)
find_package (ASGARDLINALG REQUIRED)

# enable the git properties
if (ASGARD_USE_GITINFO)
  find_package (Git)
  if (Git_FOUND)
    execute_process(COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -n 1
                    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                    OUTPUT_VARIABLE   ASGARD_GIT_COMMIT_HASH)
    execute_process(COMMAND ${GIT_EXECUTABLE} branch --show-current
                    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                    OUTPUT_STRIP_TRAILING_WHITESPACE
                    OUTPUT_VARIABLE   ASGARD_GIT_BRANCH)
    execute_process(COMMAND ${GIT_EXECUTABLE} log --pretty=format:%aD -n 1
                    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                    OUTPUT_VARIABLE   ASGARD_GIT_COMMIT_SUMMARY)
  endif()
endif()

# handles the I/O logic, must be included after ASGARDLINALG
if (ASGARD_USE_HIGHFIVE)
  enable_language(C) # needed by older versions of C
  include (${CMAKE_SOURCE_DIR}/contrib/asgard_io.cmake)
endif()

# handle OpenMP dependence
if (ASGARD_USE_OPENMP)
  find_package(OpenMP REQUIRED)
elseif (ASGARD_TRYUSE_OPENMP AND NOT APPLE)
  # OpenMP has poor support on OSX platforms, do not recommend as a default
  # ASGARD_USE_OPENMP will still explicitly enable OpenMP regardless of the platform
  find_package(OpenMP)
  if (OpenMP_FOUND)
    set (ASGARD_USE_OPENMP ON CACHE BOOL "OpenMP found and enabled" FORCE)
  else()
    message(WARNING "ASGarD recommends OpenMP for multi-threading but CMake could not find the OpenMP flags")
  endif()
endif()

# handle the MPI dependence
if (ASGARD_USE_MPI)
    find_package(MPI REQUIRED)
endif ()

# handle the precisions, must be done before Python so python_config.py can be properly set
if (NOT ASGARD_PRECISIONS OR "${ASGARD_PRECISIONS}" STREQUAL "")
    message(FATAL_ERROR "ASGARD_PRECISIONS must be defined to either 'float', 'double' or 'float;double'")
endif()
foreach(_prec ${ASGARD_PRECISIONS})
    if ("${_prec}" STREQUAL "float")
        set(ASGARD_ENABLE_FLOAT ON)
    elseif ("${_prec}" STREQUAL "double")
        set(ASGARD_ENABLE_DOUBLE ON)
    else()
        message(FATAL_ERROR "invalid precision ${_prec}, supported types are 'float' and 'double'")
    endif()
endforeach()

# handle the python module
if (ASGARD_USE_PYTHON)
    if (NOT BUILD_SHARED_LIBS)
        message(FATAL_ERROR "The ASGarD python module requires shared libraries, set -DBUILD_SHARED_LIBS=ON")
    endif()
    find_package(Python 3.0 REQUIRED COMPONENTS Interpreter)

    set(__asgard_pysubpath "${CMAKE_INSTALL_LIBDIR}/python${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}/site-packages")
    if (SKBUILD)
    # scikit build compiles and install in one place, then moves the files to a different location
        if (ASGARD_osx_framework)
            set(__asgard_pysubpath "${CMAKE_INSTALL_LIBDIR}/python/site-packages")
        endif()
        set(__asgard_final_path "${ASGARD_python_pip_path}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}asgard${CMAKE_SHARED_LIBRARY_SUFFIX}")
        set(_asgard_python_path "${CMAKE_INSTALL_PREFIX}/${__asgard_pysubpath}")
    else()
        # regular build, all those folders are the same
        set(__asgard_final_path "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}asgard${CMAKE_SHARED_LIBRARY_SUFFIX}")
        set(_asgard_python_path "${CMAKE_INSTALL_PREFIX}/${__asgard_pysubpath}")
    endif()

    # this value should be set to "ON", CMake accepts YES, ON or 1 in either upper or lower case
    # however, some of the ASGarD configured scripts (e.g., asgard-env.sh) use only "ON" as a valid value
    set(ASGARD_USE_PYTHON ON)

    # the python module should be available and working from both the build and install folders
    # hence, we do the configuration twice
    set(__pyasgard_libasgard_path__ "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}asgard${CMAKE_SHARED_LIBRARY_SUFFIX}")
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_config.py"  "${CMAKE_CURRENT_BINARY_DIR}/asgard_config.py")

    # put the module and all testing files into the build tree
    foreach(_asg_pybuildfile asgard.py pyasgard_test.py sandbox.py)
        add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_asg_pybuildfile}"
                           COMMAND "${CMAKE_COMMAND}"
                           ARGS -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/${_asg_pybuildfile} ${CMAKE_CURRENT_BINARY_DIR}/${_asg_pybuildfile}
                           DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/python/${_asg_pybuildfile}"
                           COMMENT "Copying ${CMAKE_CURRENT_SOURCE_DIR}/python/${_asg_pybuildfile}")
        list(APPEND _pyasgard_buildstage "${CMAKE_CURRENT_BINARY_DIR}/${_asg_pybuildfile}")
    endforeach()
    foreach(_asg_pybuildfile continuity_2d.py inputs_1d.py slideshow.py bgk.py)
        add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_asg_pybuildfile}"
                           COMMAND "${CMAKE_COMMAND}"
                           ARGS -E copy ${CMAKE_CURRENT_SOURCE_DIR}/examples/${_asg_pybuildfile} ${CMAKE_CURRENT_BINARY_DIR}/${_asg_pybuildfile}
                           DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/examples/${_asg_pybuildfile}"
                           COMMENT "Copying ${CMAKE_CURRENT_SOURCE_DIR}/examples/${_asg_pybuildfile}")
        list(APPEND _pyasgard_buildstage "${CMAKE_CURRENT_BINARY_DIR}/${_asg_pybuildfile}")
        install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/examples/${_asg_pybuildfile}" DESTINATION share/asgard/examples)
    endforeach()
    add_custom_target(asgard_python_testing ALL DEPENDS "${_pyasgard_buildstage}")

    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgardplot.sh"
                   "${CMAKE_CURRENT_BINARY_DIR}/asgardplot.sh" @ONLY)
    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/asgardplot.sh"
            DESTINATION "${CMAKE_INSTALL_BINDIR}"
            PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE)

    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgardrun.sh"
                   "${CMAKE_CURRENT_BINARY_DIR}/asgardrun.sh" @ONLY)
    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/asgardrun.sh"
            DESTINATION "${CMAKE_INSTALL_BINDIR}"
            PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE)

    # install the module without the testing files
    set(__pyasgard_libasgard_path__ "${__asgard_final_path}")
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_config.py"  "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_config.py")
    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_config.py" DESTINATION "${_asgard_python_path}")
    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/asgard.py" DESTINATION "${_asgard_python_path}")

    # MATLAB section, always installed with python but it doesn't have to run
    set(_asgardpy_exe_ "PYTHONPATH=\\\"\$PYTHONPATH:${CMAKE_CURRENT_BINARY_DIR}\\\" ${Python_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/asgard_matlab.py")
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_file_stats.m" "${CMAKE_CURRENT_BINARY_DIR}/asgard_file_stats.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_plot1d.m" "${CMAKE_CURRENT_BINARY_DIR}/asgard_plot1d.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_plot2d.m" "${CMAKE_CURRENT_BINARY_DIR}/asgard_plot2d.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_evaluate.m" "${CMAKE_CURRENT_BINARY_DIR}/asgard_evaluate.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_cell_centers.m" "${CMAKE_CURRENT_BINARY_DIR}/asgard_cell_centers.m" @ONLY)
    set(_asgardpy_exe_ "PYTHONPATH=\\\"\$PYTHONPATH:${_asgard_python_path}\\\" ${Python_EXECUTABLE} ${CMAKE_INSTALL_PREFIX}/share/asgard/matlab/asgard_matlab.py")
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_file_stats.m" "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_file_stats.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_plot1d.m" "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_plot1d.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_plot2d.m" "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_plot2d.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_evaluate.m" "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_evaluate.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_cell_centers.m" "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_cell_centers.m" @ONLY)
    install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/"
            DESTINATION share/asgard/matlab
            FILES_MATCHING PATTERN "*.m")

    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/examples/continuity_2d.m" "${CMAKE_CURRENT_BINARY_DIR}/example_continuity_2d.m" @ONLY)
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/examples/continuity_2d.m" "${CMAKE_CURRENT_BINARY_DIR}/examples/continuity_2d.m" @ONLY)
    foreach(_asg_pybuildfile continuity_2d.m)
        install(FILES "${CMAKE_CURRENT_BINARY_DIR}/examples/${_asg_pybuildfile}" DESTINATION share/asgard/examples)
    endforeach()

    set(_asgard_matlab_pypath_ "${CMAKE_CURRENT_BINARY_DIR}")
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_matlab.py" "${CMAKE_CURRENT_BINARY_DIR}/asgard_matlab.py" @ONLY)
    set(_asgard_matlab_pypath_ "${_asgard_python_path}")
    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/asgard_matlab.py" "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_matlab.py" @ONLY)
    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/pyinstall/asgard_matlab.py" DESTINATION share/asgard/matlab)
endif()

if (ASGARD_BUILD_TESTS)
    foreach(_asg_testinput test_input1.txt test_input2.txt)
        add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_asg_testinput}"
                          COMMAND "${CMAKE_COMMAND}"
                          ARGS -E copy "${CMAKE_CURRENT_SOURCE_DIR}/testing/${_asg_testinput}" "${CMAKE_CURRENT_BINARY_DIR}/${_asg_testinput}"
                          DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/testing/${_asg_testinput}"
                          COMMENT "Copying ${CMAKE_CURRENT_SOURCE_DIR}/testing/${_asg_testinput}")
        list(APPEND _asg_testinputfiles "${CMAKE_CURRENT_BINARY_DIR}/${_asg_testinput}")
    endforeach()

    foreach(_asg_testinput inputs_1d_1.txt inputs_1d_2.txt)
        add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_asg_testinput}"
                           COMMAND "${CMAKE_COMMAND}"
                           ARGS -E copy "${CMAKE_CURRENT_SOURCE_DIR}/examples/${_asg_testinput}" "${CMAKE_CURRENT_BINARY_DIR}/${_asg_testinput}"
                           DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/examples/${_asg_testinput}"
                           COMMENT "Copying ${CMAKE_CURRENT_SOURCE_DIR}/examples/${_asg_testinput}")
        list(APPEND _asg_testinputfiles "${CMAKE_CURRENT_BINARY_DIR}/${_asg_testinput}")

        install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${_asg_testinput} DESTINATION share/asgard/examples)
    endforeach()
    add_custom_target(asgard_test_inputs ALL DEPENDS "${_asg_testinputfiles}")
endif()

if (ASGARD_USE_CUDA)
    # CUDA has to be enabled before libasgard is created

    if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.24)
        if ("$ENV{CUDAARCHS}" STREQUAL "")
            # ENV{CUDAARCHS} is used to set CMAKE_CUDA_ARCHITECTURES
            # if not present and using recent CMake, switch to "native"
            set (CMAKE_CUDA_ARCHITECTURES "native" CACHE STRING "Architecture for the CUDA device.")
        endif()
    else()
        if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND "$ENV{CUDAARCHS}" STREQUAL "")
            message(FATAL_ERROR
"If using CMake prior to 3.24 the user must specify either \
CMAKE_CUDA_ARCHITECTURES or environment variable CUDAARCHS \
and the architecture has to be set to 60 or newer, \
which is the minimum that enables double precision atomic operations. \
CMAKE_CUDA_ARCHITECTURES could be specified as empty or 'False', \
but then the appropriate CMAKE_CUDA_FLAGS must be set manually.")
        endif()
    endif()

    enable_language (CUDA)
    find_package (CUDAToolkit REQUIRED)

    # number of blocks must be set before "asgard_build_info.hpp" is configured
    set (ASGARD_NUM_GPU_THREADS "1024" CACHE STRING "Number of threads for GPU launch kernels")
    set (ASGARD_NUM_GPU_BLOCKS "2000" CACHE STRING "Number of blocks for GPU launch kernels")
endif()

if (ASGARD_USE_ROCM)
    if (CMAKE_VERSION VERSION_LESS 3.21)
        message(FATAL_ERROR "ASGarD AMD/ROCm GPU backend requires CMake version 3.21 or newer")
    endif()
    if (ROCM_PATH)
        set(ASGARD_ROCM_PATH "${ROCM_PATH}")
    elseif (DEFINED ENV{ROCM_PATH})
        set(ASGARD_ROCM_PATH "$ENV{ROCM_PATH}")
    elseif (IS_DIRECTORY /opt/rocm)
        set(ASGARD_ROCM_PATH "/opt/rocm")
    endif()
    list (APPEND CMAKE_PREFIX_PATH ${ASGARD_ROCM_PATH}/hip ${ASGARD_ROCM_PATH})
    enable_language(HIP)
    find_package(rocsolver REQUIRED)

    # number of blocks must be set before "asgard_build_info.hpp" is configured
    set (ASGARD_NUM_GPU_THREADS "1024" CACHE STRING "Number of threads for GPU launch kernels")
    set (ASGARD_NUM_GPU_BLOCKS "2000" CACHE STRING "Number of blocks for GPU launch kernels")
endif()

set (ASGARD_NUM_QUADRATURE "10" CACHE STRING "Minimum number of quadrature points to be used transformations.")

#-------------------------------------------------------------------------------
#  Define the asgard library.
#-------------------------------------------------------------------------------
configure_file(src/asgard_build_info.hpp.in  ${CMAKE_CURRENT_BINARY_DIR}/asgard_build_info.hpp)
configure_file(src/asgard_cmakelog.hpp.in    ${CMAKE_CURRENT_BINARY_DIR}/asgard_cmakelog.hpp)

add_library (libasgard)
set_target_properties(libasgard PROPERTIES OUTPUT_NAME "asgard"
                                           SOVERSION ${asgard_VERSION_MAJOR}
                                           VERSION   ${PROJECT_VERSION})

if (ASGARD_BUILD_HDF5)
  add_dependencies (libasgard hdf5_external)
endif()

target_compile_features (libasgard PUBLIC cxx_std_17)

# components, each component "comp" may have a header asgard_component.hpp,
# a source asgard_component.cpp and a test file asgard_component_tests.hpp
# all of those are optional, e.g., if header only or testing is done elsewhere
set(asgard_components
    blas
    block_matrix
    coefficients_mats
    compute
    compute_cpu
    discretization
    domain
    function_defs
    grid_1d
    indexset
    interp
    kronmult
    kronmult_common
    kronmult_cpu
    legendre_matrices
    moment_manager
    momentset
    pde
    pde_functions
    program_options
    quadrature
    reconstruct
    refinement
    resource_groups
    small_mats
    solver
    term_build
    term_manager
    term_sources
    test_macros
    test_pdes
    time_advance
    time_data
    transformations
    tools
    wavelet_basis
)

if (ASGARD_USE_HIGHFIVE)
    list (APPEND asgard_components io)
endif ()

foreach (component IN LISTS asgard_components)
    if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_${component}.cpp)
        # some components don't have .cpp files
        target_sources (libasgard
                        PRIVATE
                        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_${component}.cpp>
        )
    endif()
    if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_${component}.hpp)
        target_precompile_headers (libasgard
                                   PUBLIC
                                   $<$<BOOL:${ASGARD_USE_PCH}>:$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_${component}.hpp>>
        )
    endif()
endforeach ()

target_precompile_headers (libasgard
                           PUBLIC
                           $<$<BOOL:${ASGARD_USE_PCH}>:$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/asgard_build_info.hpp>>
)
target_link_libraries (libasgard
                       PUBLIC
                       asgard::LINALG
                       $<$<BOOL:${MPI_CXX_FOUND}>:MPI::MPI_CXX>
                       $<$<BOOL:${OpenMP_CXX_FOUND}>:OpenMP::OpenMP_CXX>
                       $<$<BOOL:${ASGARD_USE_HIGHFIVE}>:asgard_hdf5>
                       $<$<BOOL:${ASGARD_USE_CUDA}>:CUDA::cudart>
                       $<$<BOOL:${ASGARD_USE_CUDA}>:CUDA::cublas>
                       $<$<BOOL:${ASGARD_USE_CUDA}>:CUDA::cusolver>
                       $<$<BOOL:${ASGARD_USE_ROCM}>:roc::rocsolver>
                       $<$<BOOL:${ASGARD_USE_ROCM}>:hip::host>
)
target_link_libraries (libasgard
                       PRIVATE
                       $<$<BOOL:${ASGARD_USE_HIGHFIVE}>:asgard_highfive>
)
target_include_directories (libasgard
                            PUBLIC
                            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
                            $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
)

if (ASGARD_USE_CUDA)
    target_sources(libasgard PRIVATE
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_compute_cuda.hpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_algorithms.hpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_algorithms_cuda.cpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_tensors_cuda.cpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_pde_cuda.cpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_kronmult_cuda.cpp>
                   )

    set_source_files_properties (${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_algorithms_cuda.cpp
                                 ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_tensors_cuda.cpp
                                 ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_pde_cuda.cpp
                                 ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_kronmult_cuda.cpp
                                 PROPERTIES LANGUAGE CUDA)

    target_compile_features (libasgard PUBLIC cuda_std_17)
endif()

if (ASGARD_USE_ROCM)
    target_sources(libasgard PRIVATE
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_compute_rocm.hpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_algorithms.hpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_algorithms_rocm.cpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_tensors_rocm.cpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_pde_rocm.cpp>
                   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_kronmult_rocm.cpp>
                   )

    set_source_files_properties (${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_algorithms_rocm.cpp
                                 ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_tensors_rocm.cpp
                                 ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_gpu_pde_rocm.cpp
                                 ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_kronmult_rocm.cpp
                                 PROPERTIES LANGUAGE HIP)

    target_compile_features (libasgard PUBLIC hip_std_17)
endif()

#-------------------------------------------------------------------------------
#  Define a asgard executables targets.
#-------------------------------------------------------------------------------
add_executable (asgard_exe)
set_target_properties(asgard_exe PROPERTIES OUTPUT_NAME "asgard")
target_sources (asgard_exe
                PRIVATE
                $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_main.cpp>
)
target_link_libraries (asgard_exe PRIVATE libasgard)

foreach (_asgexe IN LISTS _asgard_pdes)
    add_executable (${_asgexe} "${CMAKE_CURRENT_SOURCE_DIR}/src/pde/${_asgexe}.cpp")
    target_link_libraries (${_asgexe} PUBLIC libasgard)
    if (${_asgexe} IN_LIST _asgard_doc_pdes OR ${_asgexe} IN_LIST _asgard_gpu_pdes)
        install(TARGETS ${_asgexe} RUNTIME DESTINATION "share/asgard/pde")
        install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/pde/${_asgexe}.cpp"
                DESTINATION "share/asgard/examples")
    endif()
endforeach()

# GPU_COMPONENT and GPU_LAND are used to configure the examples CMakeLists
# if the GPU is disable, those will be set to CUDA but will not be used since
# the CUDA component will not be found, ROCM needs to change those to the right values
set (__asgard_GPU_COMPONENT "asgard_CUDA_FOUND")
set (__asgard_GPU_LANG "CUDA")
if (ASGARD_USE_CUDA OR ASGARD_USE_ROCM)
    if (ASGARD_USE_ROCM)
        set (__asgard_GPU_LANG "HIP") # using HIP
        set (__asgard_GPU_COMPONENT "asgard_ROCM_FOUND")
    endif()

    foreach (_asgpde ${_asgard_gpu_pdes})
        set_source_files_properties (${CMAKE_CURRENT_SOURCE_DIR}/src/pde/${_asgpde}.cpp
                                     PROPERTIES LANGUAGE ${__asgard_GPU_LANG})
    endforeach()
endif()

#-------------------------------------------------------------------------------
# Builds the documentation, must come after asgard target is defined
#-------------------------------------------------------------------------------
if (ASGARD_BUILD_DOCS)
  add_subdirectory(doxygen)
endif()

###############################################################################
## Testing asgard
###############################################################################
macro(asgard_add_mpi_test)
  cmake_parse_arguments(_asgard "" "NAME;COMMAND;RANKS" "" ${ARGN} )
  add_test(${_asgard_NAME} ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${_asgard_RANKS} ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/${_asgard_COMMAND} -test ${MPIEXEC_POSTFLAGS})
  set_tests_properties(${_asgard_NAME} PROPERTIES PROCESSORS ${_asgard_RANKS} RUN_SERIAL TRUE)

  if (ASGARD_USE_OPENMP)
    # mpi + openmp can yield too many heavy threads and cause a huge bottleneck in testing
    set_tests_properties (${_asgard_NAME} PROPERTIES ENVIRONMENT "OMP_NUM_THREADS=1")
  endif()

  unset(_asgard_NAME)
  unset(_asgard_RANKS)
  unset(_asgard_COMMAND)
endmacro()

if (ASGARD_BUILD_TESTS)
  enable_testing ()

  # add sandbox executable, i.e., an executable that is part of the build system
  # contain all the appropriate link flags and dependencies, but does nothing
  # other than play with some code
  add_executable(sandbox ./testing/sandbox.cpp)
  target_link_libraries (sandbox PUBLIC libasgard)

  if (ASGARD_USE_PCH)
    target_precompile_headers (sandbox REUSE_FROM libasgard)
  endif ()

  # first, add the unit and integration tests from the components
  foreach (component IN LISTS asgard_components)
      # some components don't have unit-tests, only end-to-end and integration
      # those are handled through PDEs and other components, can be skipped here
      if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_${component}_tests.cpp)
          continue()
      endif()

      set(_asgname "asgard-${component}-tests")

      add_executable (${_asgname})
      target_sources (${_asgname} PRIVATE src/asgard_${component}_tests.cpp)

      target_include_directories (${_asgname} PRIVATE ${CMAKE_SOURCE_DIR}/testing)
      target_include_directories (${_asgname} PRIVATE ${CMAKE_BINARY_DIR})

      target_link_libraries(${_asgname} PUBLIC libasgard)

      if (ASGARD_BUILD_HDF5 OR ASGARD_BUILD_OPENBLAS)
          set_target_properties(${_asgname} PROPERTIES BUILD_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/")
      endif()

      if (ASGARD_USE_PCH)
          target_precompile_headers (${_asgname} REUSE_FROM libasgard)
      endif ()

      add_test (NAME ${_asgname}
                COMMAND ${_asgname}
                WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})

      if (ASGARD_TEST_OMP_NUM_THREADS)
          set_tests_properties (${_asgname} PROPERTIES
              PROCESSORS ${ASGARD_TEST_OMP_NUM_THREADS}
              ENVIRONMENT OMP_NUM_THREADS=${ASGARD_TEST_OMP_NUM_THREADS})
      endif()

  endforeach()

  if (TARGET asgard-io-tests)
    target_link_libraries (asgard-io-tests PUBLIC asgard_highfive)
  endif()

  # the python test is on the border, uses full pde definitions
  # but focuses on the reconstruction capabilities rather than the pde
  if (ASGARD_USE_PYTHON)
    add_test(NAME asgard-pyreconstruct-test COMMAND Python::Interpreter "${CMAKE_CURRENT_BINARY_DIR}/pyasgard_test.py")
  endif()

  # if MPI is enabled, do not test all possible combinations of pdes and mpi ranks
  # some tests are naturally slower than others and since the MPI code is little
  # full MPI coverage can be achieved with a limited number of tests
  if (ASGARD_USE_MPI)
    # at least some of the test must use ranks equal or more than mpi::bcast_threshold and mpi::reduce_threshold
    # that will switch between the two algorithms, currently using 4 and 4
    set(asgard_pde_ranks_continuity 2 3 4)
    set(asgard_pde_ranks_diffusion 2)
    set(asgard_pde_ranks_spherical_diffusion 2)
    set(asgard_pde_ranks_elliptic 2 3 4)
    set(asgard_pde_ranks_sinwav 2 4)
    set(asgard_pde_ranks_bound_internal 2 3 4)
    set(asgard_pde_ranks_relaxation_internal 2)
    set(asgard_pde_ranks_two_stream 2 3 4)
  endif()

  # basic test, no crash and covers the human-readable lines of output
  # human-readable output is used a lot, any issues should be easy to catch
  add_test (NAME asgard-help    COMMAND asgard --help)
  add_test (NAME asgard-version COMMAND asgard --version)

  # add the pdes and set the MPI tests
  foreach( _asg_pde ${_asgard_pdes})
      if ("${_asg_pde}" STREQUAL "continuity_2d" OR "${_asg_pde}" STREQUAL "inputs_1d")
          if ("${_asg_pde}" STREQUAL "continuity_2d")
              add_test (NAME asgard-pde-${_asg_pde}-test
                        COMMAND ${_asg_pde}
                        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
          else ()
              add_test (NAME asgard-pde-${_asg_pde}-test
                        COMMAND ${_asg_pde} -if inputs_1d_1.txt
                        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
          endif()
      else ()
          add_test (NAME asgard-pde-${_asg_pde}-test
                    COMMAND ${_asg_pde} -test
                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
      endif()

      if (ASGARD_TEST_OMP_NUM_THREADS)
          set_tests_properties (asgard-pde-${_asg_pde}-test PROPERTIES
                                PROCESSORS ${ASGARD_TEST_OMP_NUM_THREADS}
                                ENVIRONMENT OMP_NUM_THREADS=${ASGARD_TEST_OMP_NUM_THREADS})
      endif()

      # MPI ranks are defined only if using MPI
      if (asgard_pde_ranks_${_asg_pde})
        foreach (_asgnumranks ${asgard_pde_ranks_${_asg_pde}})
          asgard_add_mpi_test(NAME asgard-pde-${_asg_pde}-mpin${_asgnumranks}  COMMAND ${_asg_pde} RANKS ${_asgnumranks})
          if (ASGARD_TEST_MPI_OMP_NUM_THREADS)
            set_tests_properties (asgard-pde-${_asg_pde}-mpin${_asgnumranks} PROPERTIES
                                  PROCESSORS ${_asgnumranks}
                                  ENVIRONMENT OMP_NUM_THREADS=${ASGARD_TEST_MPI_OMP_NUM_THREADS})
          endif()
        endforeach()
      endif()

  endforeach()

# assuming testing makes us into a stand-alone project, so be more verbose
# helps keep track of flags and options and whether things we enabled correctly
  message(STATUS "")
  message(STATUS "ASGarD version: ${ASGARD_RELEASE_INFO}")
  foreach(_opt CMAKE_BUILD_TYPE
               CMAKE_CXX_FLAGS
               ASGARD_PRECISIONS
               ASGARD_USE_OPENMP
               ASGARD_USE_MPI
               ASGARD_USE_CUDA
               ASGARD_USE_ROCM
               ASGARD_USE_PYTHON
               ASGARD_USE_HIGHFIVE
               ASGARD_USE_TIMER)
    message(STATUS "  ${_opt}=${${_opt}}")
  endforeach()
  if (ASGARD_USE_TIMER)
    message(STATUS "  ASGARD_USE_FLOPCOUNTER=${ASGARD_USE_FLOPCOUNTER}")
  endif()
  if (ASGARD_USE_CUDA)
    foreach(_opt CMAKE_CUDA_COMPILER CMAKE_CUDA_FLAGS CMAKE_CUDA_ARCHITECTURES)
      message(STATUS "  ${_opt}=${${_opt}}")
    endforeach()
  endif()
  if (ASGARD_USE_ROCM)
    foreach(_opt CMAKE_HIP_COMPILER CMAKE_HIP_FLAGS CMAKE_HIP_ARCHITECTURES)
      message(STATUS "  ${_opt}=${${_opt}}")
    endforeach()
  endif()
  if ((ASGARD_USE_CUDA OR ASGARD_USE_ROCM) AND ASGARD_USE_MPI)
    message(STATUS "  ASGARD_GPUMPI_DIRECT=${ASGARD_GPUMPI_DIRECT}")
  endif()
  message(STATUS "")

endif ()

#-------------------------------------------------------------------------------
# Installing the library as stand-alone
#-------------------------------------------------------------------------------
target_include_directories(libasgard PUBLIC $<INSTALL_INTERFACE:${__asgard_install_prefix}/${CMAKE_INSTALL_INCLUDEDIR}>)

foreach (_asg_target asgard_hdf5 libasgard asgard_exe)
  if (TARGET ${_asg_target})
    install(TARGETS ${_asg_target} EXPORT  "asgard-export")
  endif()
endforeach()

# installing the headers
if (ASGARD_USE_CUDA)
  list(APPEND asgard_components compute_cuda)
  list(APPEND asgard_components gpu_algorithms_cuda)
endif()
if (ASGARD_USE_ROCM)
  list(APPEND asgard_components compute_rocm)
  list(APPEND asgard_components gpu_algorithms_rocm)
endif()
foreach (_asgcomp ${asgard_components})
  if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_${_asgcomp}.hpp")
    install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/asgard_${_asgcomp}.hpp"
            DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
  endif()
endforeach()
install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/asgard.hpp"
              "${CMAKE_CURRENT_SOURCE_DIR}/src/asgard.h"
        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/asgard_build_info.hpp"
        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")

configure_file(examples/CMakeLists.txt ${CMAKE_CURRENT_BINARY_DIR}/examples/CMakeLists.txt @ONLY)
configure_file(testing/TestingCMakeLists.txt ${CMAKE_CURRENT_BINARY_DIR}/test/CMakeLists.txt @ONLY)
configure_file(testing/test_post_install.sh ${CMAKE_CURRENT_BINARY_DIR}/test_post_install.sh @ONLY)
if (ASGARD_BUILD_TESTS)
    add_custom_target(test_install COMMAND "${CMAKE_CURRENT_BINARY_DIR}/test_post_install.sh")
endif()

configure_file(asgard-env.sh ${CMAKE_CURRENT_BINARY_DIR}/asgard-env.sh @ONLY)
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/asgard-env.sh"
        DESTINATION share/asgard/)

install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/examples/"
        DESTINATION share/asgard/examples
        FILES_MATCHING PATTERN "*.cpp")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/examples/CMakeLists.txt"
        DESTINATION share/asgard/examples)
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/test/CMakeLists.txt"
        DESTINATION share/asgard/testing)

install(EXPORT "asgard-export"
        DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME}"
        FILE "${CMAKE_PROJECT_NAME}-targets.cmake")

configure_package_config_file("${CMAKE_CURRENT_SOURCE_DIR}/asgard-config.cmake"
                              "${CMAKE_CURRENT_BINARY_DIR}/asgard-config.cmake"
                              INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME}/")
write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/asgard-config-version.cmake"
                                 COMPATIBILITY AnyNewerVersion)
# INSTALL_DESTINATION above seems to refer to something else, asgard-config.cmake must be installed explicitly
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/asgard-config.cmake"
              "${CMAKE_CURRENT_BINARY_DIR}/asgard-config-version.cmake"
        DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME}")
