Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_GpuLaunchFunctsSIMD.H
Go to the documentation of this file.
1#ifndef AMREX_GPU_LAUNCH_FUNCTS_SIMD_H_
2#define AMREX_GPU_LAUNCH_FUNCTS_SIMD_H_
3
4#include <AMReX_Config.H>
5#include <AMReX_Extension.H>
6#include <AMReX_SIMD.H>
7
8#include <type_traits>
9
10
11namespace amrex
12{
13
22template<int WIDTH, class N=int>
24{
26 static constexpr int width = WIDTH;
27
29 N index = 0;
30};
31
40template <int WIDTH, typename N, typename L, typename M=std::enable_if_t<std::is_integral_v<N>> >
42void ParallelForSIMD (N n, L const& f) noexcept
43{
44 N i = 0;
45 // vectorize full lanes
46 for (; i + WIDTH <= n; i+=WIDTH) {
48 }
49 // scalar handling of the remainder
50 // note: we could make the remainder calls faster, by repeatedly
51 // decreasing the SIMD width by 2 until we reach 1
52 for (; i < n; ++i) {
53 f(SIMDindex<1, N>{i});
54 }
55}
56
65template <typename T, typename N, typename L, typename M=std::enable_if_t<std::is_integral_v<N>> >
67void ParallelForSIMD (N n, L && f) noexcept
68{
69#ifdef AMREX_USE_SIMD
70 if constexpr (amrex::simd::is_vectorized<T>) {
71 amrex::ParallelForSIMD<T::simd_width>(n, std::forward<L>(f));
72 } else
73#endif
74 {
75 amrex::ParallelFor(n, std::forward<L>(f));
76 }
77}
78
79} // namespace amrex
80
81#endif
#define AMREX_ATTRIBUTE_FLATTEN_FOR
Definition AMReX_Extension.H:151
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
void ParallelForSIMD(N n, L const &f) noexcept
Definition AMReX_GpuLaunchFunctsSIMD.H:42
Definition AMReX_GpuLaunchFunctsSIMD.H:24
static constexpr int width
Definition AMReX_GpuLaunchFunctsSIMD.H:26
N index
Definition AMReX_GpuLaunchFunctsSIMD.H:29