Block-Structured AMR Software Framework
AMReX_CTOParallelForImpl.H
Go to the documentation of this file.
1 #ifndef AMREX_CTO_PARALLEL_FOR_H_
2 #define AMREX_CTO_PARALLEL_FOR_H_
3 
4 #include <AMReX_BLassert.H>
5 #include <AMReX_Box.H>
6 #include <AMReX_TypeList.H>
7 
8 #include <array>
9 #include <type_traits>
10 
11 /* This header is not for the users to include directly. It's meant to be
12  * included in AMReX_GpuLaunch.H, which has included the headers needed
13  * here. */
14 
15 /* Thank Maikel Nadolski and Alex Sinn for the techniques used here! */
16 
17 namespace amrex {
18 
19 template <int... ctr>
21  // TypeList is defined in AMReX_TypeList.H
23 };
24 
25 #if (__cplusplus >= 201703L)
26 
27 namespace detail
28 {
29  template<class F, int... ctr>
30  struct CTOWrapper {
31  F f;
32 
33  template<class... Args>
35  auto operator() (Args... args) const noexcept
36  -> decltype(f(args..., std::integral_constant<int, ctr>{}...)) {
37  return f(args..., std::integral_constant<int, ctr>{}...);
38  }
39 
41  static constexpr
42  std::array<int, sizeof...(ctr)> GetOptions () noexcept {
43  return {ctr...};
44  }
45  };
46 
47  template <class L, typename... As, class... Fs>
48  bool
49  AnyCTO_helper2 (const L& l, TypeList<As...>,
50  std::array<int,sizeof...(As)> const& runtime_options, const Fs&...cto_functs)
51  {
52  if (runtime_options == std::array<int,sizeof...(As)>{As::value...}) {
53  if constexpr (sizeof...(cto_functs) != 0) {
54  // Apply the CTOWrapper to each function that was given in cto_functs
55  // and call the CPU function l with all of them
56  l(CTOWrapper<Fs, As::value...>{cto_functs}...);
57  } else {
58  // No functions in cto_functs so we call l directly with the compile time arguments
59  l(As{}...);
60  }
61  return true;
62  } else {
63  return false;
64  }
65  }
66 
67  template <class L, typename... PPs, typename RO, class...Fs>
68  void
69  AnyCTO_helper1 (const L& l, TypeList<PPs...>,
70  RO const& runtime_options, const Fs&...cto_functs)
71  {
72  bool found_option = (false || ... ||
73  AnyCTO_helper2(l, PPs{}, runtime_options, cto_functs...));
74  amrex::ignore_unused(found_option);
75  AMREX_ASSERT(found_option);
76  }
77 }
78 
79 #endif
80 
182 template <class L, class... Fs, typename... CTOs>
183 void AnyCTO ([[maybe_unused]] TypeList<CTOs...> list_of_compile_time_options,
184  std::array<int,sizeof...(CTOs)> const& runtime_options,
185  L&& l, Fs&&...cto_functs)
186 {
187 #if (__cplusplus >= 201703L)
188  detail::AnyCTO_helper1(std::forward<L>(l),
189  CartesianProduct(typename CTOs::list_type{}...),
190  runtime_options,
191  std::forward<Fs>(cto_functs)...);
192 #else
193  amrex::ignore_unused(runtime_options, l, f);
194  static_assert(std::is_integral<F>::value, "This requires C++17");
195 #endif
196 }
197 
198 template <int MT, typename T, class F, typename... CTOs>
199 std::enable_if_t<std::is_integral_v<T>>
201  std::array<int,sizeof...(CTOs)> const& runtime_options,
202  T N, F&& f)
203 {
204  AnyCTO(ctos, runtime_options,
205  [&](auto cto_func){
206  ParallelFor<MT>(N, cto_func);
207  },
208  std::forward<F>(f)
209  );
210 }
211 
212 template <int MT, class F, int dim, typename... CTOs>
214  std::array<int,sizeof...(CTOs)> const& runtime_options,
215  BoxND<dim> const& box, F&& f)
216 {
217  AnyCTO(ctos, runtime_options,
218  [&](auto cto_func){
219  ParallelFor<MT>(box, cto_func);
220  },
221  std::forward<F>(f)
222  );
223 }
224 
225 template <int MT, typename T, class F, int dim, typename... CTOs>
226 std::enable_if_t<std::is_integral_v<T>>
228  std::array<int,sizeof...(CTOs)> const& runtime_options,
229  BoxND<dim> const& box, T ncomp, F&& f)
230 {
231  AnyCTO(ctos, runtime_options,
232  [&](auto cto_func){
233  ParallelFor<MT>(box, ncomp, cto_func);
234  },
235  std::forward<F>(f)
236  );
237 }
238 
283 template <typename T, class F, typename... CTOs>
284 std::enable_if_t<std::is_integral_v<T>>
286  std::array<int,sizeof...(CTOs)> const& option,
287  T N, F&& f)
288 {
289  ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, N, std::forward<F>(f));
290 }
291 
337 template <class F, int dim, typename... CTOs>
339  std::array<int,sizeof...(CTOs)> const& option,
340  BoxND<dim> const& box, F&& f)
341 {
342  ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, std::forward<F>(f));
343 }
344 
391 template <typename T, class F, int dim, typename... CTOs>
392 std::enable_if_t<std::is_integral_v<T>>
394  std::array<int,sizeof...(CTOs)> const& option,
395  BoxND<dim> const& box, T ncomp, F&& f)
396 {
397  ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, ncomp, std::forward<F>(f));
398 }
399 
400 }
401 
402 #endif
#define AMREX_ASSERT(EX)
Definition: AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition: AMReX_Extension.H:119
#define AMREX_GPU_HOST_DEVICE
Definition: AMReX_GpuQualifiers.H:20
A Rectangular Domain on an Integer Lattice.
Definition: AMReX_Box.H:43
static int f(amrex::Real t, N_Vector y_data, N_Vector y_rhs, void *user_data)
Definition: AMReX_SundialsIntegrator.H:44
Definition: AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: AMReX_CTOParallelForImpl.H:200
void AnyCTO([[maybe_unused]] TypeList< CTOs... > list_of_compile_time_options, std::array< int, sizeof...(CTOs)> const &runtime_options, L &&l, Fs &&...cto_functs)
Compile time optimization of kernels with run time options.
Definition: AMReX_CTOParallelForImpl.H:183
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition: AMReX.H:111
constexpr auto CartesianProduct(Ls...)
Cartesian Product of TypeLists.
Definition: AMReX_TypeList.H:150
const int[]
Definition: AMReX_BLProfiler.cpp:1664
Definition: AMReX_FabArrayCommI.H:896
Definition: AMReX_CTOParallelForImpl.H:20
Struct for holding types.
Definition: AMReX_TypeList.H:12