Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_MFParallelForG.H
Go to the documentation of this file.
1#ifndef AMREX_MF_PARALLEL_FOR_G_H_
2#define AMREX_MF_PARALLEL_FOR_G_H_
3#include <AMReX_Config.H>
4
5#ifdef AMREX_USE_GPU
6
7#include <AMReX_Concepts.H>
8#include <algorithm>
9#include <cmath>
10#include <limits>
11
13namespace amrex::detail {
14
15inline
16void build_par_for_boxes (char*& hp, BoxIndexer*& pboxes, Vector<Box> const& boxes)
17{
18 if (boxes.empty()) { return; }
19 const int nboxes = boxes.size();
20 const std::size_t nbytes = nboxes*sizeof(BoxIndexer);
21 hp = (char*)The_Pinned_Arena()->alloc(nbytes);
22 auto* hp_boxes = (BoxIndexer*)hp;
23 for (int i = 0; i < nboxes; ++i) {
24 new (hp_boxes+i) BoxIndexer(boxes[i]);
25 }
26
27 auto dp = (char*) The_Arena()->alloc(nbytes);
28 Gpu::htod_memcpy_async(dp, hp, nbytes);
29 pboxes = (BoxIndexer*)dp;
30}
31
32inline
33void destroy_par_for_boxes (char* hp, char* dp)
34{
36 The_Arena()->free(dp);
37}
38
39namespace parfor_mf_detail {
40 template <typename F>
42 auto call_f (F const& f, int b, int i, int j, int k, int) noexcept
43 -> decltype(f(0,0,0,0))
44 {
45 f(b,i,j,k);
46 }
47
48 template <typename F>
50 auto call_f (F const& f, int b, int i, int j, int k, int ncomp) noexcept
51 -> decltype(f(0,0,0,0,0))
52 {
53 for (int n = 0; n < ncomp; ++n) {
54 f(b,i,j,k,n);
55 }
56 }
57}
58
59template <int MT, FabArrayType MF, typename F>
60void
61ParallelFor_doit (MF const& mf, IntVect const& nghost, int ncomp, IntVect const&, bool, F const& f)
62{
63 const auto& index_array = mf.IndexArray();
64 const int nboxes = index_array.size();
65
66 if (nboxes == 0) {
67 return;
68 } else if (nboxes == 1) {
69 Box const& b = amrex::grow(mf.box(index_array[0]), nghost);
70 amrex::ParallelFor(b, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
71 {
72 parfor_mf_detail::call_f(f, 0, i, j, k, ncomp);
73 });
74 } else {
75 auto const& parforinfo = mf.getParForInfo(nghost);
76 auto nblocks_per_box = parforinfo.getNBlocksPerBox(MT);
77 AMREX_ASSERT(Long(nblocks_per_box)*Long(nboxes) < Long(std::numeric_limits<int>::max()));
78 const int nblocks = nblocks_per_box * nboxes;
79 const BoxIndexer* dp_boxes = parforinfo.getBoxes();
80
81#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
82
83 amrex::launch_global<MT>
84 <<<nblocks, MT, 0, Gpu::gpuStream()>>>
85 ([=] AMREX_GPU_DEVICE () noexcept
86 {
87 int ibox = int(blockIdx.x) / nblocks_per_box;
88 auto icell = std::uint64_t(blockIdx.x-ibox*nblocks_per_box)*MT + threadIdx.x;
89
90#elif defined(AMREX_USE_SYCL)
91
92 amrex::launch<MT>(nblocks, Gpu::gpuStream(),
93 [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) noexcept
94 {
95 int blockIdxx = item.get_group_linear_id();
96 int threadIdxx = item.get_local_linear_id();
97 int ibox = int(blockIdxx) / nblocks_per_box;
98 auto icell = std::uint64_t(blockIdxx-ibox*nblocks_per_box)*MT + threadIdxx;
99#endif
100 BoxIndexer const& indexer = dp_boxes[ibox];
101 if (icell < indexer.numPts()) {
102 auto [i, j, k] = indexer(icell);
103 parfor_mf_detail::call_f(f, ibox, i, j, k, ncomp);
104 }
105 });
106 }
108}
109
110template <FabArrayType MF, typename F>
111void
112ParallelFor_doit (MF const& mf, IntVect const& nghost, int ncomp, IntVect const& ts, bool dynamic, F&& f)
113{
114#ifdef AMREX_USE_CUDA
115 constexpr int MT = 128;
116#else
117 constexpr int MT = AMREX_GPU_MAX_THREADS;
118#endif
119 ParallelFor_doit<MT>(mf, nghost, ncomp, ts, dynamic, std::forward<F>(f));
120}
121
122template <FabArrayType MF, typename F>
123void
124ParallelFor_doit (MF const& mf, IntVect const& nghost, IntVect const& ts, bool dynamic, F&& f)
125{
126#ifdef AMREX_USE_CUDA
127 constexpr int MT = 128;
128#else
129 constexpr int MT = AMREX_GPU_MAX_THREADS;
130#endif
131 ParallelFor_doit<MT>(mf, nghost, 1, ts, dynamic, std::forward<F>(f));
132}
133
134}
136
137#endif
138#endif
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38
#define AMREX_GPU_ERROR_CHECK()
Definition AMReX_GpuError.H:151
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18
virtual void free(void *pt)=0
A pure virtual function for deleting the arena pointed to by pt.
virtual void * alloc(std::size_t sz)=0
amrex_long Long
Definition AMReX_INT.H:30
__host__ __device__ BoxND< dim > grow(const BoxND< dim > &b, int i) noexcept
Grow BoxND in all directions by given amount.
Definition AMReX_Box.H:1289
Arena * The_Pinned_Arena()
Definition AMReX_Arena.cpp:860
Arena * The_Arena()
Definition AMReX_Arena.cpp:820
void htod_memcpy_async(void *p_d, const void *p_h, const std::size_t sz) noexcept
Definition AMReX_GpuDevice.H:421
gpuStream_t gpuStream() noexcept
Definition AMReX_GpuDevice.H:291
void ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition AMReX_CTOParallelForImpl.H:202
BoxND< 3 > Box
Box is an alias for amrex::BoxND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:30
BoxIndexerND< 3 > BoxIndexer
Definition AMReX_Box.H:2242
IntVectND< 3 > IntVect
IntVect is an alias for amrex::IntVectND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:33
const int[]
Definition AMReX_BLProfiler.cpp:1664