Block-Structured AMR Software Framework
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
AMReX_CTOParallelForImpl.H
Go to the documentation of this file.
1#ifndef AMREX_CTO_PARALLEL_FOR_H_
2#define AMREX_CTO_PARALLEL_FOR_H_
3
4#include <AMReX_BLassert.H>
5#include <AMReX_Box.H>
6#include <AMReX_TypeList.H>
7
8#include <array>
9#include <type_traits>
10
11/* This header is not for the users to include directly. It's meant to be
12 * included in AMReX_GpuLaunch.H, which has included the headers needed
13 * here. */
14
15/* Thank Maikel Nadolski and Alex Sinn for the techniques used here! */
16
17namespace amrex {
18
19template <int... ctr>
21 // TypeList is defined in AMReX_TypeList.H
23};
24
25namespace detail
26{
27 template<class F, int... ctr>
28 struct CTOWrapper {
30
31 template<class... Args>
33 auto operator() (Args... args) const noexcept
34 -> decltype(f(args..., std::integral_constant<int, ctr>{}...)) {
35 return f(args..., std::integral_constant<int, ctr>{}...);
36 }
37
39 static constexpr
40 std::array<int, sizeof...(ctr)> GetOptions () noexcept {
41 return {ctr...};
42 }
43 };
44
45 template <class L, typename... As, class... Fs>
46 bool
48 std::array<int,sizeof...(As)> const& runtime_options, const Fs&...cto_functs)
49 {
50 if (runtime_options == std::array<int,sizeof...(As)>{As::value...}) {
51 if constexpr (sizeof...(cto_functs) != 0) {
52 // Apply the CTOWrapper to each function that was given in cto_functs
53 // and call the CPU function l with all of them
54 l(CTOWrapper<Fs, As::value...>{cto_functs}...);
55 } else {
56 // No functions in cto_functs so we call l directly with the compile time arguments
57 l(As{}...);
58 }
59 return true;
60 } else {
61 return false;
62 }
63 }
64
65 template <class L, typename... PPs, typename RO, class...Fs>
66 void
68 RO const& runtime_options, const Fs&...cto_functs)
69 {
70 bool found_option = (false || ... ||
71 AnyCTO_helper2(l, PPs{}, runtime_options, cto_functs...));
72 amrex::ignore_unused(found_option);
73 AMREX_ASSERT(found_option);
74 }
75}
76
178template <class L, class... Fs, typename... CTOs>
179void AnyCTO ([[maybe_unused]] TypeList<CTOs...> list_of_compile_time_options,
180 std::array<int,sizeof...(CTOs)> const& runtime_options,
181 L&& l, Fs&&...cto_functs)
182{
183 detail::AnyCTO_helper1(std::forward<L>(l),
184 CartesianProduct(typename CTOs::list_type{}...),
185 runtime_options,
186 std::forward<Fs>(cto_functs)...);
187}
188
189template <int MT, typename T, class F, typename... CTOs>
190std::enable_if_t<std::is_integral_v<T>>
192 std::array<int,sizeof...(CTOs)> const& runtime_options,
193 T N, F&& f)
194{
195 AnyCTO(ctos, runtime_options,
196 [&](auto cto_func){
197 ParallelFor<MT>(N, cto_func);
198 },
199 std::forward<F>(f)
200 );
201}
202
203template <int MT, class F, int dim, typename... CTOs>
205 std::array<int,sizeof...(CTOs)> const& runtime_options,
206 BoxND<dim> const& box, F&& f)
207{
208 AnyCTO(ctos, runtime_options,
209 [&](auto cto_func){
210 ParallelFor<MT>(box, cto_func);
211 },
212 std::forward<F>(f)
213 );
214}
215
216template <int MT, typename T, class F, int dim, typename... CTOs>
217std::enable_if_t<std::is_integral_v<T>>
219 std::array<int,sizeof...(CTOs)> const& runtime_options,
220 BoxND<dim> const& box, T ncomp, F&& f)
221{
222 AnyCTO(ctos, runtime_options,
223 [&](auto cto_func){
224 ParallelFor<MT>(box, ncomp, cto_func);
225 },
226 std::forward<F>(f)
227 );
228}
229
274template <typename T, class F, typename... CTOs>
275std::enable_if_t<std::is_integral_v<T>>
277 std::array<int,sizeof...(CTOs)> const& option,
278 T N, F&& f)
279{
280 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, N, std::forward<F>(f));
281}
282
328template <class F, int dim, typename... CTOs>
330 std::array<int,sizeof...(CTOs)> const& option,
331 BoxND<dim> const& box, F&& f)
332{
333 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, std::forward<F>(f));
334}
335
382template <typename T, class F, int dim, typename... CTOs>
383std::enable_if_t<std::is_integral_v<T>>
385 std::array<int,sizeof...(CTOs)> const& option,
386 BoxND<dim> const& box, T ncomp, F&& f)
387{
388 ParallelFor<AMREX_GPU_MAX_THREADS>(ctos, option, box, ncomp, std::forward<F>(f));
389}
390
391}
392
393#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
A Rectangular Domain on an Integer Lattice.
Definition AMReX_Box.H:43
void AnyCTO_helper1(const L &l, TypeList< PPs... >, RO const &runtime_options, const Fs &...cto_functs)
Definition AMReX_CTOParallelForImpl.H:67
bool AnyCTO_helper2(const L &l, TypeList< As... >, std::array< int, sizeof...(As)> const &runtime_options, const Fs &...cto_functs)
Definition AMReX_CTOParallelForImpl.H:47
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
void AnyCTO(TypeList< CTOs... > list_of_compile_time_options, std::array< int, sizeof...(CTOs)> const &runtime_options, L &&l, Fs &&...cto_functs)
Compile time optimization of kernels with run time options.
Definition AMReX_CTOParallelForImpl.H:179
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:127
constexpr auto CartesianProduct(Ls...)
Cartesian Product of TypeLists.
Definition AMReX_TypeList.H:150
const int[]
Definition AMReX_BLProfiler.cpp:1664
Definition AMReX_FabArrayCommI.H:896
Definition AMReX_CTOParallelForImpl.H:20
Struct for holding types.
Definition AMReX_TypeList.H:12
Definition AMReX_CTOParallelForImpl.H:28
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto operator()(Args... args) const noexcept -> decltype(f(args..., std::integral_constant< int, ctr >{}...))
Definition AMReX_CTOParallelForImpl.H:33
F f
Definition AMReX_CTOParallelForImpl.H:29
AMREX_GPU_HOST_DEVICE static AMREX_FORCE_INLINE constexpr std::array< int, sizeof...(ctr)> GetOptions() noexcept
Definition AMReX_CTOParallelForImpl.H:40