Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_CTOParallelForImpl.H
Go to the documentation of this file.
1#ifndef AMREX_CTO_PARALLEL_FOR_H_
2#define AMREX_CTO_PARALLEL_FOR_H_
3
4#include <AMReX_BLassert.H>
5#include <AMReX_Box.H>
6#include <AMReX_TypeList.H>
7
8#include <array>
9#include <concepts>
10#include <type_traits>
11#include <utility>
12
13/* This header is not for the users to include directly. It's meant to be
14 * included in AMReX_GpuLaunch.H, which has included the headers needed
15 * here. */
16
17/* Thank Maikel Nadolski and Alex Sinn for the techniques used here! */
18
19namespace amrex {
20
21template <int... ctr>
23 // TypeList is defined in AMReX_TypeList.H
25};
26
28namespace detail
29{
30 template<class F, int... ctr>
31 struct CTOWrapper {
32 F f;
33
34 template<class... Args>
36 auto operator() (Args... args) const noexcept
37 -> decltype(f(args..., std::integral_constant<int, ctr>{}...)) {
38 return f(args..., std::integral_constant<int, ctr>{}...);
39 }
40
42 static constexpr
43 std::array<int, sizeof...(ctr)> GetOptions () noexcept {
44 return {ctr...};
45 }
46 };
47
48 template <class L, typename... As, class... Fs>
49 bool
50 AnyCTO_helper2 (const L& l, TypeList<As...>,
51 std::array<int,sizeof...(As)> const& runtime_options, const Fs&...cto_functs)
52 {
53 if (runtime_options == std::array<int,sizeof...(As)>{As::value...}) {
54 if constexpr (sizeof...(cto_functs) != 0) {
55 // Apply the CTOWrapper to each function that was given in cto_functs
56 // and call the CPU function l with all of them
57 l(CTOWrapper<Fs, As::value...>{cto_functs}...);
58 } else {
59 // No functions in cto_functs so we call l directly with the compile time arguments
60 l(As{}...);
61 }
62 return true;
63 } else {
64 return false;
65 }
66 }
67
68 template <class L, typename... PPs, typename RO, class...Fs>
69 void
70 AnyCTO_helper1 (const L& l, TypeList<PPs...>,
71 RO const& runtime_options, const Fs&...cto_functs)
72 {
73 bool found_option = (false || ... ||
74 AnyCTO_helper2(l, PPs{}, runtime_options, cto_functs...));
75 amrex::ignore_unused(found_option);
76 AMREX_ASSERT(found_option);
77 }
78
79 template <int Begin, int End, int... I>
80 constexpr auto MakeCTOImpl(std::integer_sequence<int, I...>)
81 -> CompileTimeOptions<Begin + I...>;
82}
84
85template <int Begin, int End>
86using CTOSeq = decltype(detail::MakeCTOImpl<Begin, End>(
87 std::make_integer_sequence<int, End - Begin>{}));
88
190template <class L, class... Fs, typename... CTOs>
191void AnyCTO ([[maybe_unused]] TypeList<CTOs...> list_of_compile_time_options,
192 std::array<int,sizeof...(CTOs)> const& runtime_options,
193 L&& l, Fs&&...cto_functs)
194{
195 detail::AnyCTO_helper1(std::forward<L>(l),
196 CartesianProduct(typename CTOs::list_type{}...),
197 runtime_options,
198 std::forward<Fs>(cto_functs)...);
199}
200
201template <int MT, std::integral T, class F, typename... CTOs>
203 std::array<int,sizeof...(CTOs)> const& runtime_options,
204 T N, F&& f)
205{
206 AnyCTO(ctos, runtime_options,
207 [&](auto cto_func){
208 ParallelFor<MT>(N, cto_func);
209 },
210 std::forward<F>(f)
211 );
212}
213
214template <int MT, class F, int dim, typename... CTOs>
216 std::array<int,sizeof...(CTOs)> const& runtime_options,
217 BoxND<dim> const& box, F&& f)
218{
219 AnyCTO(ctos, runtime_options,
220 [&](auto cto_func){
221 ParallelFor<MT>(box, cto_func);
222 },
223 std::forward<F>(f)
224 );
225}
226
227template <int MT, std::integral T, class F, int dim, typename... CTOs>
229 std::array<int,sizeof...(CTOs)> const& runtime_options,
230 BoxND<dim> const& box, T ncomp, F&& f)
231{
232 AnyCTO(ctos, runtime_options,
233 [&](auto cto_func){
234 ParallelFor<MT>(box, ncomp, cto_func);
235 },
236 std::forward<F>(f)
237 );
238}
239
284template <std::integral T, class F, typename... CTOs>
286 std::array<int,sizeof...(CTOs)> const& option,
287 T N, F&& f)
288{
289 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, N, std::forward<F>(f));
290}
291
337template <class F, int dim, typename... CTOs>
339 std::array<int,sizeof...(CTOs)> const& option,
340 BoxND<dim> const& box, F&& f)
341{
342 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, std::forward<F>(f));
343}
344
391template <std::integral T, class F, int dim, typename... CTOs>
393 std::array<int,sizeof...(CTOs)> const& option,
394 BoxND<dim> const& box, T ncomp, F&& f)
395{
396 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, ncomp, std::forward<F>(f));
397}
398
399}
400
401#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
A Rectangular Domain on an Integer Lattice.
Definition AMReX_Box.H:49
Definition AMReX_Amr.cpp:50
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
void AnyCTO(TypeList< CTOs... > list_of_compile_time_options, std::array< int, sizeof...(CTOs)> const &runtime_options, L &&l, Fs &&...cto_functs)
Compile time optimization of kernels with run time options.
Definition AMReX_CTOParallelForImpl.H:191
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
constexpr auto CartesianProduct(Ls...)
Cartesian Product of TypeLists.
Definition AMReX_TypeList.H:155
const int[]
Definition AMReX_BLProfiler.cpp:1664
decltype(detail::MakeCTOImpl< Begin, End >(std::make_integer_sequence< int, End - Begin >{})) CTOSeq
Definition AMReX_CTOParallelForImpl.H:87
Definition AMReX_CTOParallelForImpl.H:22
Struct for holding types.
Definition AMReX_TypeList.H:13