amrex/doxygen/AMReX__GpuLaunchFunctsSIMD_8H_source.html

#ifndef AMREX_GPU_LAUNCH_FUNCTS_SIMD_H_

#define AMREX_GPU_LAUNCH_FUNCTS_SIMD_H_


#include <AMReX_Config.H>

#include <AMReX_Extension.H>

#include <AMReX_SIMD.H>


#include <type_traits>


namespace amrex

{


template<int WIDTH, class N=int>


struct SIMDindex

{

    static constexpr int width = WIDTH;


    N index = 0;

};


template <int WIDTH, typename N, typename L, typename M=std::enable_if_t<std::is_integral_v<N>> >

AMREX_ATTRIBUTE_FLATTEN_FOR


void ParallelForSIMD (N n, L const& f) noexcept

{

    N i = 0;

    // vectorize full lanes

    for (; i + WIDTH <= n; i+=WIDTH) {

        f(SIMDindex<WIDTH, N>{i});

    }

    // scalar handling of the remainder

    // note: we could make the remainder calls faster, by repeatedly

    //       decreasing the SIMD width by 2 until we reach 1

    for (; i < n; ++i) {

        f(SIMDindex<1, N>{i});

    }

}


template <typename T, typename N, typename L, typename M=std::enable_if_t<std::is_integral_v<N>> >

AMREX_ATTRIBUTE_FLATTEN_FOR


void ParallelForSIMD (N n, L && f) noexcept

{

#ifdef AMREX_USE_SIMD

    if constexpr (amrex::simd::is_vectorized<T>) {

        amrex::ParallelForSIMD<T::simd_width>(n, std::forward<L>(f));

    } else

#endif

    {

        amrex::ParallelFor(n, std::forward<L>(f));

    }

}


} // namespace amrex


#endif

AMReX_Extension.H

AMREX_ATTRIBUTE_FLATTEN_FOR
#define AMREX_ATTRIBUTE_FLATTEN_FOR
Definition AMReX_Extension.H:151

AMReX_SIMD.H

amrex
Definition AMReX_Amr.cpp:49

amrex::ParallelFor
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193

amrex::ParallelForSIMD
void ParallelForSIMD(N n, L const &f) noexcept
Definition AMReX_GpuLaunchFunctsSIMD.H:42

amrex::SIMDindex
Definition AMReX_GpuLaunchFunctsSIMD.H:24

amrex::SIMDindex::width
static constexpr int width
Definition AMReX_GpuLaunchFunctsSIMD.H:26

amrex::SIMDindex::index
N index
Definition AMReX_GpuLaunchFunctsSIMD.H:29