Block-Structured AMR Software Framework
Loading...
Searching...
No Matches
AMReX_SIMD.H
Go to the documentation of this file.
1#ifndef AMREX_SIMD_H_
2#define AMREX_SIMD_H_
3
4#include <AMReX_Config.H>
5
6#include <AMReX.H> // amrex::ignore_unused
7#include <AMReX_REAL.H>
8
9#ifdef AMREX_USE_SIMD
10// TODO make SIMD provider configurable: VIR (C++17 TS2) or C++26 (later)
11# include <vir/simd.h> // includes SIMD TS2 header <experimental/simd>
12# if __cplusplus >= 202002L
13# include <vir/simd_cvt.h>
14# endif
15#endif
16
17#include <cstdint>
18#include <type_traits>
19
20
21namespace amrex::simd
22{
23 // TODO make SIMD provider configurable: VIR (C++17 TS2) or C++26 (later)
24 // for https://en.cppreference.com/w/cpp/experimental/simd/simd_cast.html
25 namespace stdx {
26#ifdef AMREX_USE_SIMD
27 using namespace vir::stdx;
28# if __cplusplus >= 202002L
29 using vir::cvt;
30# endif
31#else
32 // fallback implementations for functions that are commonly used in portable code paths
33
35 bool any_of (bool const v) { return v; }
36#endif
37 }
38
39 // TODO: move to AMReX_REAL.H?
40
41#ifdef AMREX_USE_SIMD
42 // TODO: not sure why std::experimental::simd_abi::native<T> does not work, so we use this long version
43 constexpr auto native_simd_size_real = stdx::native_simd<amrex::Real>::size();
44 constexpr auto native_simd_size_particlereal = stdx::native_simd<amrex::ParticleReal>::size();
45
46 // Note: to make use of not only vector registers but also ILP, user might want to use * 2 or more of the native size
47 // for selected compute kernels.
48 // TODO Check if a default with * 2 or similar is sensible.
49 template<int SIMD_WIDTH = native_simd_size_real>
50 using SIMDReal = stdx::fixed_size_simd<amrex::Real, SIMD_WIDTH>;
51
52 template<int SIMD_WIDTH = native_simd_size_particlereal>
53 using SIMDParticleReal = stdx::fixed_size_simd<amrex::ParticleReal, SIMD_WIDTH>;
54
55 // Type that has the same number of int SIMD elements as the SIMDParticleReal type
56 template<typename T_ParticleReal = SIMDParticleReal<>>
57 using SIMDInt = stdx::rebind_simd_t<int, T_ParticleReal>;
58
59 // Type that has the same number of IdCpu SIMD elements as the SIMDParticleReal type
60 template<typename T_ParticleReal = SIMDParticleReal<>>
61 using SIMDIdCpu = stdx::rebind_simd_t<std::uint64_t, T_ParticleReal>;
62#else
63 constexpr auto native_simd_size_real = 1;
65
66 template<int SIMD_WIDTH = native_simd_size_real>
68
69 template<int SIMD_WIDTH = native_simd_size_particlereal>
71
72 // Type that has the same number of int SIMD elements as the SIMDParticleReal type
73 template<typename T_ParticleReal = SIMDParticleReal<>>
74 using SIMDInt = int;
75
76 // Type that has the same number of IdCpu SIMD elements as the SIMDParticleReal type
77 template<typename T_ParticleReal = SIMDParticleReal<>>
78 using SIMDIdCpu = std::uint64_t;
79#endif
80
82 namespace detail {
83 struct InternalVectorized {};
84 }
86
113 template<int SIMD_WIDTH = native_simd_size_real>
114 struct
115 Vectorized : detail::InternalVectorized
116 {
118 static constexpr int simd_width = SIMD_WIDTH;
119 };
120
125 template<typename T>
126 constexpr bool is_vectorized = std::is_base_of_v<detail::InternalVectorized, T>;
127
148 template<typename R, typename... Args>
149 constexpr bool is_nth_arg_non_const (R(*)(Args...), int n)
150 {
151 constexpr bool val_arr[sizeof...(Args)] {!std::is_const_v<std::remove_reference_t<Args>>...};
152 return val_arr[n];
153 }
154 // same for functors (const/non-const ::operator() members)
155 template<typename C, typename R, typename... Args>
156 constexpr bool is_nth_arg_non_const (R(C::*)(Args...), int n)
157 {
158 constexpr bool val_arr[sizeof...(Args)] {!std::is_const_v<std::remove_reference_t<Args>>...};
159 return val_arr[n];
160 }
161 template<typename C, typename R, typename... Args>
162 constexpr bool is_nth_arg_non_const (R(C::*)(Args...) const, int n)
163 {
164 constexpr bool val_arr[sizeof...(Args)] {!std::is_const_v<std::remove_reference_t<Args>>...};
165 return val_arr[n];
166 }
167
181 template <typename T, typename IndexType>
183 decltype(auto) load_1d (T* ptr, IndexType const i)
184 {
185 if constexpr (std::is_integral_v<IndexType>) {
186 return ptr[i];
187 } else if constexpr (IndexType::width == 1) {
188 return ptr[i.index];
189 } else {
190#ifdef AMREX_USE_SIMD
191 using DataType = stdx::fixed_size_simd<std::decay_t<T>, IndexType::width>;
192
193 // initialize vector register
194 // TODO stdx::vector_aligned needs alignment guarantees
195 // https://github.com/AMReX-Codes/amrex/issues/4592
196 // https://en.cppreference.com/w/cpp/experimental/simd/simd/copy_from
197 DataType val;
198 val.copy_from(&ptr[i.index], stdx::element_aligned);
199 return val;
200
201#else
202 static_assert(IndexType::width == 1, "SIMD width must be 1 for non-SIMD builds");
203 return ptr[i.index];
204#endif
205 }
206 }
207
230 template <auto P_Method, int N, typename T, typename IndexType, typename ValType>
232 void store_1d (
233 ValType const & AMREX_RESTRICT val,
234 T * const AMREX_RESTRICT ptr,
235 IndexType const i
236 )
237 {
238 // SIMD uses special vector register types in ValType that need to be copied back to RAM array type T
239 if constexpr (!std::is_same_v<ValType, T>) {
240 if constexpr (amrex::simd::is_nth_arg_non_const(P_Method, N)) {
241#ifdef AMREX_USE_SIMD
242 // write back to memory
243 // TODO stdx::vector_aligned needs alignment guarantees
244 // https://github.com/AMReX-Codes/amrex/issues/4592
245 // https://en.cppreference.com/w/cpp/experimental/simd/simd/copy_from
246 val.copy_to(&ptr[i.index], amrex::simd::stdx::element_aligned);
247#else
248 amrex::Abort("store_1d: ValType val must alias T ptr data (to make this a no-OP).");
249#endif
250 }
251 }
252 amrex::ignore_unused(val, ptr, i);
253 }
254
255} // namespace amrex::simd
256
257#endif
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:119
#define AMREX_RESTRICT
Definition AMReX_Extension.H:32
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20
amrex_real Real
Floating Point Type for Fields.
Definition AMReX_REAL.H:79
amrex_particle_real ParticleReal
Floating Point Type for Particles.
Definition AMReX_REAL.H:90
__host__ __device__ bool any_of(bool const v)
Definition AMReX_SIMD.H:35
Definition AMReX_SIMD.H:22
std::uint64_t SIMDIdCpu
Definition AMReX_SIMD.H:78
__host__ __device__ void store_1d(ValType const &__restrict__ val, T *const __restrict__ ptr, IndexType const i)
Definition AMReX_SIMD.H:232
constexpr auto native_simd_size_real
Definition AMReX_SIMD.H:63
int SIMDInt
Definition AMReX_SIMD.H:74
constexpr bool is_vectorized
Definition AMReX_SIMD.H:126
constexpr bool is_nth_arg_non_const(R(*)(Args...), int n)
Definition AMReX_SIMD.H:149
amrex::ParticleReal SIMDParticleReal
Definition AMReX_SIMD.H:70
__host__ __device__ decltype(auto) load_1d(T *ptr, IndexType const i)
Definition AMReX_SIMD.H:183
constexpr auto native_simd_size_particlereal
Definition AMReX_SIMD.H:64
amrex::Real SIMDReal
Definition AMReX_SIMD.H:67
__host__ __device__ void ignore_unused(const Ts &...)
This shuts up the compiler about unused variables.
Definition AMReX.H:139
void Abort(const std::string &msg)
Print out message to cerr and exit via abort().
Definition AMReX.cpp:230
const int[]
Definition AMReX_BLProfiler.cpp:1664
Definition AMReX_SIMD.H:116