Block-Structured AMR Software Framework
 
Loading...
Searching...
No Matches
AMReX_MFParallelForG.H
Go to the documentation of this file.
1#ifndef AMREX_MF_PARALLEL_FOR_G_H_
2#define AMREX_MF_PARALLEL_FOR_G_H_
3#include <AMReX_Config.H>
4
5#ifdef AMREX_USE_GPU
6
7#include <algorithm>
8#include <cmath>
9#include <limits>
10
11namespace amrex {
12namespace detail {
13
14inline
15void build_par_for_boxes (char*& hp, BoxIndexer*& pboxes, Vector<Box> const& boxes)
16{
17 if (boxes.empty()) { return; }
18 const int nboxes = boxes.size();
19 const std::size_t nbytes = nboxes*sizeof(BoxIndexer);
20 hp = (char*)The_Pinned_Arena()->alloc(nbytes);
21 auto* hp_boxes = (BoxIndexer*)hp;
22 for (int i = 0; i < nboxes; ++i) {
23 new (hp_boxes+i) BoxIndexer(boxes[i]);
24 }
25
26 auto dp = (char*) The_Arena()->alloc(nbytes);
27 Gpu::htod_memcpy_async(dp, hp, nbytes);
28 pboxes = (BoxIndexer*)dp;
29}
30
31inline
32void destroy_par_for_boxes (char* hp, char* dp)
33{
35 The_Arena()->free(dp);
36}
37}
38
39namespace experimental::detail {
40
41namespace parfor_mf_detail {
42 template <typename F>
44 auto call_f (F const& f, int b, int i, int j, int k, int) noexcept
45 -> decltype(f(0,0,0,0))
46 {
47 f(b,i,j,k);
48 }
49
50 template <typename F>
52 auto call_f (F const& f, int b, int i, int j, int k, int ncomp) noexcept
53 -> decltype(f(0,0,0,0,0))
54 {
55 for (int n = 0; n < ncomp; ++n) {
56 f(b,i,j,k,n);
57 }
58 }
59}
60
61template <int MT, typename MF, typename F>
62std::enable_if_t<IsFabArray<MF>::value>
63ParallelFor (MF const& mf, IntVect const& nghost, int ncomp, IntVect const&, bool, F const& f)
64{
65 const auto& index_array = mf.IndexArray();
66 const int nboxes = index_array.size();
67
68 if (nboxes == 0) {
69 return;
70 } else if (nboxes == 1) {
71 Box const& b = amrex::grow(mf.box(index_array[0]), nghost);
72 amrex::ParallelFor(b, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
73 {
74 parfor_mf_detail::call_f(f, 0, i, j, k, ncomp);
75 });
76 } else {
77 auto const& parforinfo = mf.getParForInfo(nghost);
78 auto nblocks_per_box = parforinfo.getNBlocksPerBox(MT);
79 AMREX_ASSERT(Long(nblocks_per_box)*Long(nboxes) < Long(std::numeric_limits<int>::max()));
80 const int nblocks = nblocks_per_box * nboxes;
81 const BoxIndexer* dp_boxes = parforinfo.getBoxes();
82
83#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
84
85 amrex::launch_global<MT>
86 <<<nblocks, MT, 0, Gpu::gpuStream()>>>
87 ([=] AMREX_GPU_DEVICE () noexcept
88 {
89 int ibox = int(blockIdx.x) / nblocks_per_box;
90 auto icell = std::uint64_t(blockIdx.x-ibox*nblocks_per_box)*MT + threadIdx.x;
91
92#elif defined(AMREX_USE_SYCL)
93
94 amrex::launch<MT>(nblocks, Gpu::gpuStream(),
95 [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) noexcept
96 {
97 int blockIdxx = item.get_group_linear_id();
98 int threadIdxx = item.get_local_linear_id();
99 int ibox = int(blockIdxx) / nblocks_per_box;
100 auto icell = std::uint64_t(blockIdxx-ibox*nblocks_per_box)*MT + threadIdxx;
101#endif
102 BoxIndexer const& indexer = dp_boxes[ibox];
103 if (icell < indexer.numPts()) {
104 auto [i, j, k] = indexer(icell);
105 parfor_mf_detail::call_f(f, ibox, i, j, k, ncomp);
106 }
107 });
108 }
110}
111
112template <typename MF, typename F>
113std::enable_if_t<IsFabArray<MF>::value>
114ParallelFor (MF const& mf, IntVect const& nghost, int ncomp, IntVect const& ts, bool dynamic, F&& f)
115{
116#ifdef AMREX_USE_CUDA
117 constexpr int MT = 128;
118#else
119 constexpr int MT = AMREX_GPU_MAX_THREADS;
120#endif
121 ParallelFor<MT>(mf, nghost, ncomp, ts, dynamic, std::forward<F>(f));
122}
123
124template <typename MF, typename F>
125std::enable_if_t<IsFabArray<MF>::value>
126ParallelFor (MF const& mf, IntVect const& nghost, IntVect const& ts, bool dynamic, F&& f)
127{
128#ifdef AMREX_USE_CUDA
129 constexpr int MT = 128;
130#else
131 constexpr int MT = AMREX_GPU_MAX_THREADS;
132#endif
133 ParallelFor<MT>(mf, nghost, 1, ts, dynamic, std::forward<F>(f));
134}
135
136}
137
138}
139
140#endif
141#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_ERROR_CHECK()
Definition AMReX_GpuError.H:133
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
This class is a thin wrapper around std::vector. Unlike vector, Vector::operator[] provides bound che...
Definition AMReX_Vector.H:28
Long size() const noexcept
Definition AMReX_Vector.H:53
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:289
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:241
void destroy_par_for_boxes(char *hp, char *dp)
Definition AMReX_MFParallelForG.H:32
void build_par_for_boxes(char *&hp, BoxIndexer *&pboxes, Vector< Box > const &boxes)
Definition AMReX_MFParallelForG.H:15
AMREX_GPU_DEVICE auto call_f(F const &f, int b, int i, int j, int k, int) noexcept -> decltype(f(0, 0, 0, 0))
Definition AMReX_MFParallelForG.H:44
Definition AMReX_Amr.cpp:49
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:191
BoxIndexerND< 3 > BoxIndexer
Definition AMReX_Box.H:2117
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:745
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1229
Arena * The_Arena()
Definition AMReX_Arena.cpp:705
Definition AMReX_FabArrayCommI.H:1000
Definition AMReX_Box.H:2045
__host__ __device__ std::uint64_t numPts() const
Definition AMReX_Box.H:2086