Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_MFParallelForG.H
Go to the documentation of this file.
1#ifndef AMREX_MF_PARALLEL_FOR_G_H_
2#define AMREX_MF_PARALLEL_FOR_G_H_
3#include <AMReX_Config.H>
4
5#ifdef AMREX_USE_GPU
6
7#include <algorithm>
8#include <cmath>
9#include <limits>
10
12namespace amrex::detail {
13
14inline
15void build_par_for_boxes (char*& hp, BoxIndexer*& pboxes, Vector<Box> const& boxes)
16{
17 if (boxes.empty()) { return; }
18 const int nboxes = boxes.size();
19 const std::size_t nbytes = nboxes*sizeof(BoxIndexer);
20 hp = (char*)The_Pinned_Arena()->alloc(nbytes);
21 auto* hp_boxes = (BoxIndexer*)hp;
22 for (int i = 0; i < nboxes; ++i) {
23 new (hp_boxes+i) BoxIndexer(boxes[i]);
24 }
25
26 auto dp = (char*) The_Arena()->alloc(nbytes);
27 Gpu::htod_memcpy_async(dp, hp, nbytes);
28 pboxes = (BoxIndexer*)dp;
29}
30
31inline
32void destroy_par_for_boxes (char* hp, char* dp)
33{
35 The_Arena()->free(dp);
36}
37
38namespace parfor_mf_detail {
39 template <typename F>
41 auto call_f (F const& f, int b, int i, int j, int k, int) noexcept
42 -> decltype(f(0,0,0,0))
43 {
44 f(b,i,j,k);
45 }
46
47 template <typename F>
49 auto call_f (F const& f, int b, int i, int j, int k, int ncomp) noexcept
50 -> decltype(f(0,0,0,0,0))
51 {
52 for (int n = 0; n < ncomp; ++n) {
53 f(b,i,j,k,n);
54 }
55 }
56}
57
58template <int MT, typename MF, typename F>
59std::enable_if_t<IsFabArray<MF>::value>
60ParallelFor_doit (MF const& mf, IntVect const& nghost, int ncomp, IntVect const&, bool, F const& f)
61{
62 const auto& index_array = mf.IndexArray();
63 const int nboxes = index_array.size();
64
65 if (nboxes == 0) {
66 return;
67 } else if (nboxes == 1) {
68 Box const& b = amrex::grow(mf.box(index_array[0]), nghost);
69 amrex::ParallelFor(b, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
70 {
71 parfor_mf_detail::call_f(f, 0, i, j, k, ncomp);
72 });
73 } else {
74 auto const& parforinfo = mf.getParForInfo(nghost);
75 auto nblocks_per_box = parforinfo.getNBlocksPerBox(MT);
76 AMREX_ASSERT(Long(nblocks_per_box)*Long(nboxes) < Long(std::numeric_limits<int>::max()));
77 const int nblocks = nblocks_per_box * nboxes;
78 const BoxIndexer* dp_boxes = parforinfo.getBoxes();
79
80#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
81
82 amrex::launch_global<MT>
83 <<<nblocks, MT, 0, Gpu::gpuStream()>>>
84 ([=] AMREX_GPU_DEVICE () noexcept
85 {
86 int ibox = int(blockIdx.x) / nblocks_per_box;
87 auto icell = std::uint64_t(blockIdx.x-ibox*nblocks_per_box)*MT + threadIdx.x;
88
89#elif defined(AMREX_USE_SYCL)
90
91 amrex::launch<MT>(nblocks, Gpu::gpuStream(),
92 [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) noexcept
93 {
94 int blockIdxx = item.get_group_linear_id();
95 int threadIdxx = item.get_local_linear_id();
96 int ibox = int(blockIdxx) / nblocks_per_box;
97 auto icell = std::uint64_t(blockIdxx-ibox*nblocks_per_box)*MT + threadIdxx;
98#endif
99 BoxIndexer const& indexer = dp_boxes[ibox];
100 if (icell < indexer.numPts()) {
101 auto [i, j, k] = indexer(icell);
102 parfor_mf_detail::call_f(f, ibox, i, j, k, ncomp);
103 }
104 });
105 }
107}
108
109template <typename MF, typename F>
110std::enable_if_t<IsFabArray<MF>::value>
111ParallelFor_doit (MF const& mf, IntVect const& nghost, int ncomp, IntVect const& ts, bool dynamic, F&& f)
112{
113#ifdef AMREX_USE_CUDA
114 constexpr int MT = 128;
115#else
116 constexpr int MT = AMREX_GPU_MAX_THREADS;
117#endif
118 ParallelFor_doit<MT>(mf, nghost, ncomp, ts, dynamic, std::forward<F>(f));
119}
120
121template <typename MF, typename F>
122std::enable_if_t<IsFabArray<MF>::value>
123ParallelFor_doit (MF const& mf, IntVect const& nghost, IntVect const& ts, bool dynamic, F&& f)
124{
125#ifdef AMREX_USE_CUDA
126 constexpr int MT = 128;
127#else
128 constexpr int MT = AMREX_GPU_MAX_THREADS;
129#endif
130 ParallelFor_doit<MT>(mf, nghost, 1, ts, dynamic, std::forward<F>(f));
131}
132
133}
135
136#endif
137#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_ERROR_CHECK()
Definition AMReX_GpuError.H:133
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1280
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:193
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:27
BoxIndexerND< 3 > BoxIndexer
Definition AMReX_Box.H:2224
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:823
Arena * The_Arena()
Definition AMReX_Arena.cpp:783