Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_SIMD.H
Go to the documentation of this file.
1#ifndef AMREX_SIMD_H_
2#define AMREX_SIMD_H_
3
4#include <AMReX_Config.H>
5
6#include <AMReX.H> // amrex::ignore_unused
7#include <AMReX_REAL.H>
8
9#ifdef AMREX_USE_SIMD
10// TODO make SIMD provider configurable: VIR (C++17 TS2) or C++26 (later)
11# include <vir/simd.h> // includes SIMD TS2 header <experimental/simd>
12# if __cplusplus >= 202002L
13# include <vir/simd_cvt.h>
14# endif
15#endif
16
17#include <cstdint>
18#include <type_traits>
19
20
21namespace amrex::simd
22{
23 // TODO make SIMD provider configurable: VIR (C++17 TS2) or C++26 (later)
24 // for https://en.cppreference.com/w/cpp/experimental/simd/simd_cast.html
25 namespace stdx {
26#ifdef AMREX_USE_SIMD
27 using namespace vir::stdx;
28# if __cplusplus >= 202002L
29 using vir::cvt;
30# endif
31
58 template <typename T, typename Abi>
60 vir::stdx::simd<T, Abi> select (
61 typename vir::stdx::simd<T, Abi>::mask_type const& mask,
62 vir::stdx::simd<T, Abi> const& true_val,
63 vir::stdx::simd<T, Abi> const& false_val)
64 {
65 vir::stdx::simd<T, Abi> result = false_val;
66 where(mask, result) = true_val;
67 return result;
68 }
69#else
70 // fallback implementations for functions that are commonly used in portable code paths
71
84 bool any_of (bool const v) { return v; }
85
87 namespace detail {
88 template <typename T>
89 struct where_expression {
90 bool mask;
91 T* value;
92
94 where_expression& operator= (T const& new_val)
95 {
96 if (mask) { *value = new_val; }
97 return *this;
98 }
99 };
100 }
102
118 template <typename T>
120 detail::where_expression<T> where (bool const mask, T& value)
121 {
122 return {mask, &value};
123 }
124
130 template <typename T>
132 T select (bool const mask, T const& true_val, T const& false_val)
133 {
134 return mask ? true_val : false_val;
135 }
136#endif
137 }
138
139 // TODO: move to AMReX_REAL.H?
140
141#ifdef AMREX_USE_SIMD
142 // TODO: not sure why std::experimental::simd_abi::native<T> does not work, so we use this long version
143 constexpr auto native_simd_size_real = stdx::native_simd<amrex::Real>::size();
144 constexpr auto native_simd_size_particlereal = stdx::native_simd<amrex::ParticleReal>::size();
145
146 // Note: to make use of not only vector registers but also ILP, user might want to use * 2 or more of the native size
147 // for selected compute kernels.
148 // TODO Check if a default with * 2 or similar is sensible.
149 template<int SIMD_WIDTH = native_simd_size_real>
150 using SIMDReal = stdx::fixed_size_simd<amrex::Real, SIMD_WIDTH>;
151
152 template<int SIMD_WIDTH = native_simd_size_particlereal>
153 using SIMDParticleReal = stdx::fixed_size_simd<amrex::ParticleReal, SIMD_WIDTH>;
154
155 // Type that has the same number of int SIMD elements as the SIMDParticleReal type
156 template<typename T_ParticleReal = SIMDParticleReal<>>
157 using SIMDInt = stdx::rebind_simd_t<int, T_ParticleReal>;
158
159 // Type that has the same number of IdCpu SIMD elements as the SIMDParticleReal type
160 template<typename T_ParticleReal = SIMDParticleReal<>>
161 using SIMDIdCpu = stdx::rebind_simd_t<std::uint64_t, T_ParticleReal>;
162#else
163 constexpr auto native_simd_size_real = 1;
165
166 template<int SIMD_WIDTH = native_simd_size_real>
168
169 template<int SIMD_WIDTH = native_simd_size_particlereal>
171
172 // Type that has the same number of int SIMD elements as the SIMDParticleReal type
173 template<typename T_ParticleReal = SIMDParticleReal<>>
174 using SIMDInt = int;
175
176 // Type that has the same number of IdCpu SIMD elements as the SIMDParticleReal type
177 template<typename T_ParticleReal = SIMDParticleReal<>>
178 using SIMDIdCpu = std::uint64_t;
179#endif
180
182 namespace detail {
183 struct InternalVectorized {};
184 }
186
213 template<int SIMD_WIDTH = native_simd_size_real>
214 struct
215 Vectorized : detail::InternalVectorized
216 {
218 static constexpr int simd_width = SIMD_WIDTH;
219 };
220
225 template<typename T>
226 constexpr bool is_vectorized = std::is_base_of_v<detail::InternalVectorized, T>;
227
248 template<typename R, typename... Args>
249 constexpr bool is_nth_arg_non_const (R(*)(Args...), int n)
250 {
251 constexpr bool val_arr[sizeof...(Args)] {!std::is_const_v<std::remove_reference_t<Args>>...};
252 return val_arr[n];
253 }
254 // same for functors (const/non-const ::operator() members)
255 template<typename C, typename R, typename... Args>
256 constexpr bool is_nth_arg_non_const (R(C::*)(Args...), int n)
257 {
258 constexpr bool val_arr[sizeof...(Args)] {!std::is_const_v<std::remove_reference_t<Args>>...};
259 return val_arr[n];
260 }
261 template<typename C, typename R, typename... Args>
262 constexpr bool is_nth_arg_non_const (R(C::*)(Args...) const, int n)
263 {
264 constexpr bool val_arr[sizeof...(Args)] {!std::is_const_v<std::remove_reference_t<Args>>...};
265 return val_arr[n];
266 }
267
281 template <typename T, typename IndexType>
283 decltype(auto) load_1d (T* ptr, IndexType const i)
284 {
285 if constexpr (std::is_integral_v<IndexType>) {
286 return ptr[i];
287 } else if constexpr (IndexType::width == 1) {
288 return ptr[i.index];
289 } else {
290#ifdef AMREX_USE_SIMD
291 using DataType = stdx::fixed_size_simd<std::decay_t<T>, IndexType::width>;
292
293 // initialize vector register
294 // TODO stdx::vector_aligned needs alignment guarantees
295 // https://github.com/AMReX-Codes/amrex/issues/4592
296 // https://en.cppreference.com/w/cpp/experimental/simd/simd/copy_from
297 DataType val;
298 val.copy_from(&ptr[i.index], stdx::element_aligned);
299 return val;
300
301#else
302 static_assert(IndexType::width == 1, "SIMD width must be 1 for non-SIMD builds");
303 return ptr[i.index];
304#endif
305 }
306 }
307
337 template <auto P_Method, int N, bool ForceWriteback = false,
338 typename T, typename IndexType, typename ValType>
340 void store_1d (
341 ValType const & AMREX_RESTRICT val,
342 T * const AMREX_RESTRICT ptr,
343 IndexType const i
344 )
345 {
346 // SIMD uses special vector register types in ValType that need to be copied back to RAM array type T
347 if constexpr (!std::is_same_v<ValType, T>) {
348 if constexpr (ForceWriteback || amrex::simd::is_nth_arg_non_const(P_Method, N)) {
349#ifdef AMREX_USE_SIMD
350 // write back to memory
351 // TODO stdx::vector_aligned needs alignment guarantees
352 // https://github.com/AMReX-Codes/amrex/issues/4592
353 // https://en.cppreference.com/w/cpp/experimental/simd/simd/copy_from
354 val.copy_to(&ptr[i.index], amrex::simd::stdx::element_aligned);
355#else
356 amrex::Abort("store_1d: ValType val must alias T ptr data (to make this a no-OP).");
357#endif
358 }
359 }
360 amrex::ignore_unused(val, ptr, i);
361 }
362
363} // namespace amrex::simd
364
365#endif
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_RESTRICT
Definition AMReX_Extension.H:32
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
Array4< int const > mask
Definition AMReX_InterpFaceRegister.cpp:93
amrex_real Real
Floating Point Type for Fields.
Definition AMReX_REAL.H:79
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90
__host__ __device__ bool any_of(bool const v)
Definition AMReX_SIMD.H:84
__host__ __device__ detail::where_expression< T > where(bool const mask, T &value)
Definition AMReX_SIMD.H:120
__host__ __device__ T select(bool const mask, T const &true_val, T const &false_val)
Definition AMReX_SIMD.H:132
Definition AMReX_SIMD.H:22
__host__ __device__ void store_1d(ValType const &__restrict__ val, T *const __restrict__ ptr, IndexType const i)
Definition AMReX_SIMD.H:340
std::uint64_t SIMDIdCpu
Definition AMReX_SIMD.H:178
constexpr auto native_simd_size_real
Definition AMReX_SIMD.H:163
int SIMDInt
Definition AMReX_SIMD.H:174
constexpr bool is_vectorized
Definition AMReX_SIMD.H:226
constexpr bool is_nth_arg_non_const(R(*)(Args...), int n)
Definition AMReX_SIMD.H:249
amrex::ParticleReal SIMDParticleReal
Definition AMReX_SIMD.H:170
__host__ __device__ decltype(auto) load_1d(T *ptr, IndexType const i)
Definition AMReX_SIMD.H:283
constexpr auto native_simd_size_particlereal
Definition AMReX_SIMD.H:164
amrex::Real SIMDReal
Definition AMReX_SIMD.H:167
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
IndexTypeND< 3 > IndexType
IndexType is an alias for amrex::IndexTypeND instantiated with AMREX_SPACEDIM.
Definition AMReX_BaseFwd.H:36
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:241
const int[]
Definition AMReX_BLProfiler.cpp:1664
Definition AMReX_SIMD.H:216