Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_CTOParallelForImpl.H
Go to the documentation of this file.
1#ifndef AMREX_CTO_PARALLEL_FOR_H_
2#define AMREX_CTO_PARALLEL_FOR_H_
3
4#include <AMReX_BLassert.H>
5#include <AMReX_Box.H>
6#include <AMReX_TypeList.H>
7
8#include <array>
9#include <type_traits>
10
11/* This header is not for the users to include directly. It's meant to be
12 * included in AMReX_GpuLaunch.H, which has included the headers needed
13 * here. */
14
15/* Thank Maikel Nadolski and Alex Sinn for the techniques used here! */
16
17namespace amrex {
18
19template <int... ctr>
21 // TypeList is defined in AMReX_TypeList.H
23};
24
26namespace detail
27{
28 template<class F, int... ctr>
29 struct CTOWrapper {
30 F f;
31
32 template<class... Args>
34 auto operator() (Args... args) const noexcept
35 -> decltype(f(args..., std::integral_constant<int, ctr>{}...)) {
36 return f(args..., std::integral_constant<int, ctr>{}...);
37 }
38
40 static constexpr
41 std::array<int, sizeof...(ctr)> GetOptions () noexcept {
42 return {ctr...};
43 }
44 };
45
46 template <class L, typename... As, class... Fs>
47 bool
48 AnyCTO_helper2 (const L& l, TypeList<As...>,
49 std::array<int,sizeof...(As)> const& runtime_options, const Fs&...cto_functs)
50 {
51 if (runtime_options == std::array<int,sizeof...(As)>{As::value...}) {
52 if constexpr (sizeof...(cto_functs) != 0) {
53 // Apply the CTOWrapper to each function that was given in cto_functs
54 // and call the CPU function l with all of them
55 l(CTOWrapper<Fs, As::value...>{cto_functs}...);
56 } else {
57 // No functions in cto_functs so we call l directly with the compile time arguments
58 l(As{}...);
59 }
60 return true;
61 } else {
62 return false;
63 }
64 }
65
66 template <class L, typename... PPs, typename RO, class...Fs>
67 void
68 AnyCTO_helper1 (const L& l, TypeList<PPs...>,
69 RO const& runtime_options, const Fs&...cto_functs)
70 {
71 bool found_option = (false || ... ||
72 AnyCTO_helper2(l, PPs{}, runtime_options, cto_functs...));
73 amrex::ignore_unused(found_option);
74 AMREX_ASSERT(found_option);
75 }
76}
78
180template <class L, class... Fs, typename... CTOs>
181void AnyCTO ([[maybe_unused]] TypeList<CTOs...> list_of_compile_time_options,
182 std::array<int,sizeof...(CTOs)> const& runtime_options,
183 L&& l, Fs&&...cto_functs)
184{
185 detail::AnyCTO_helper1(std::forward<L>(l),
186 CartesianProduct(typename CTOs::list_type{}...),
187 runtime_options,
188 std::forward<Fs>(cto_functs)...);
189}
190
191template <int MT, typename T, class F, typename... CTOs>
192std::enable_if_t<std::is_integral_v<T>>
194 std::array<int,sizeof...(CTOs)> const& runtime_options,
195 T N, F&& f)
196{
197 AnyCTO(ctos, runtime_options,
198 [&](auto cto_func){
199 ParallelFor<MT>(N, cto_func);
200 },
201 std::forward<F>(f)
202 );
203}
204
205template <int MT, class F, int dim, typename... CTOs>
207 std::array<int,sizeof...(CTOs)> const& runtime_options,
208 BoxND<dim> const& box, F&& f)
209{
210 AnyCTO(ctos, runtime_options,
211 [&](auto cto_func){
212 ParallelFor<MT>(box, cto_func);
213 },
214 std::forward<F>(f)
215 );
216}
217
218template <int MT, typename T, class F, int dim, typename... CTOs>
219std::enable_if_t<std::is_integral_v<T>>
221 std::array<int,sizeof...(CTOs)> const& runtime_options,
222 BoxND<dim> const& box, T ncomp, F&& f)
223{
224 AnyCTO(ctos, runtime_options,
225 [&](auto cto_func){
226 ParallelFor<MT>(box, ncomp, cto_func);
227 },
228 std::forward<F>(f)
229 );
230}
231
276template <typename T, class F, typename... CTOs>
277std::enable_if_t<std::is_integral_v<T>>
279 std::array<int,sizeof...(CTOs)> const& option,
280 T N, F&& f)
281{
282 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, N, std::forward<F>(f));
283}
284
330template <class F, int dim, typename... CTOs>
332 std::array<int,sizeof...(CTOs)> const& option,
333 BoxND<dim> const& box, F&& f)
334{
335 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, std::forward<F>(f));
336}
337
384template <typename T, class F, int dim, typename... CTOs>
385std::enable_if_t<std::is_integral_v<T>>
387 std::array<int,sizeof...(CTOs)> const& option,
388 BoxND<dim> const& box, T ncomp, F&& f)
389{
390 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, ncomp, std::forward<F>(f));
391}
392
393}
394
395#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
A Rectangular Domain on an Integer Lattice.
Definition AMReX_Box.H:49
Definition AMReX_Amr.cpp:49
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:138
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
void AnyCTO(TypeList< CTOs... > list_of_compile_time_options, std::array< int, sizeof...(CTOs)> const &runtime_options, L &&l, Fs &&...cto_functs)
Compile time optimization of kernels with run time options.
Definition AMReX_CTOParallelForImpl.H:181
constexpr auto CartesianProduct(Ls...)
Cartesian Product of TypeLists.
Definition AMReX_TypeList.H:154
Definition AMReX_CTOParallelForImpl.H:20
Struct for holding types.
Definition AMReX_TypeList.H:12