docs_html/doxygen/AMReX__AlgVecUtil_8H_source.html

#ifndef AMREX_ALG_VEC_UTIL_H_

#define AMREX_ALG_VEC_UTIL_H_

#include <AMReX_Config.H>


#include <AMReX_AlgVector.H>


namespace amrex {


template <class V> struct IsAlgVector : std::false_type {};

//

template <class V>

requires (std::same_as<AlgVector<typename V::value_type,

                                 typename V::allocator_type>,

                       V>)

struct IsAlgVector<V> : std::true_type {};


template <typename V1, typename F>

requires (IsAlgVector<std::decay_t<V1> >::value)

void


ForEach (V1 & x, F const& f)

{

    Long n = x.numLocalRows();

    auto* px = x.data();

    ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept

    {

        f(px[i]);

    });

}


template <typename V1, typename V2, typename F>

requires (IsAlgVector<std::decay_t<V1> >::value &&

          IsAlgVector<std::decay_t<V2> >::value)

void


ForEach (V1 & x, V2 & y, F const& f)

{

    AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());

    Long n = x.numLocalRows();

    auto* AMREX_RESTRICT px = x.data();

    auto* AMREX_RESTRICT py = y.data();

    ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept

    {

        f(px[i], py[i]);

    });

}


template <typename V1, typename V2, typename V3, typename F>

requires (IsAlgVector<std::decay_t<V1> >::value &&

          IsAlgVector<std::decay_t<V2> >::value &&

          IsAlgVector<std::decay_t<V3> >::value)

void


ForEach (V1 & x, V2 & y, V3 & z, F const& f)

{

    AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());

    AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());

    Long n = x.numLocalRows();

    auto* AMREX_RESTRICT px = x.data();

    auto* AMREX_RESTRICT py = y.data();

    auto* AMREX_RESTRICT pz = z.data();

    ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept

    {

        f(px[i], py[i], pz[i]);

    });

}


template <typename V1, typename V2, typename V3, typename V4, typename F>

requires (IsAlgVector<std::decay_t<V1> >::value &&

          IsAlgVector<std::decay_t<V2> >::value &&

          IsAlgVector<std::decay_t<V3> >::value &&

          IsAlgVector<std::decay_t<V4> >::value)

void


ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)

{

    AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());

    AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());

    AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());

    Long n = x.numLocalRows();

    auto* AMREX_RESTRICT px = x.data();

    auto* AMREX_RESTRICT py = y.data();

    auto* AMREX_RESTRICT pz = z.data();

    auto* AMREX_RESTRICT pa = a.data();

    ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept

    {

        f(px[i], py[i], pz[i], pa[i]);

    });

}


template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>

requires (IsAlgVector<std::decay_t<V1> >::value &&

          IsAlgVector<std::decay_t<V2> >::value &&

          IsAlgVector<std::decay_t<V3> >::value &&

          IsAlgVector<std::decay_t<V4> >::value &&

          IsAlgVector<std::decay_t<V5> >::value)

void


ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)

{

    AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());

    AMREX_ASSERT(x.numLocalRows() == z.numLocalRows());

    AMREX_ASSERT(x.numLocalRows() == a.numLocalRows());

    AMREX_ASSERT(x.numLocalRows() == b.numLocalRows());

    Long n = x.numLocalRows();

    auto* AMREX_RESTRICT px = x.data();

    auto* AMREX_RESTRICT py = y.data();

    auto* AMREX_RESTRICT pz = z.data();

    auto* AMREX_RESTRICT pa = a.data();

    auto* AMREX_RESTRICT pb = b.data();

    ParallelForOMP(n, [=] AMREX_GPU_DEVICE (Long i) noexcept

    {

        f(px[i], py[i], pz[i], pa[i], pb[i]);

    });

}


template <typename T, typename Allocator>


T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool local = false)

{

    AMREX_ASSERT(x.numLocalRows() == y.numLocalRows());

    Long n = x.numLocalRows();

    auto const* px = x.data();

    auto const* py = y.data();

    T r = Reduce::Sum<T>(n, [=] AMREX_GPU_DEVICE (Long i) noexcept

                         {

                             return px[i] * py[i];

                         });

    if (!local) {

        ParallelAllReduce::Sum(r, ParallelContext::CommunicatorSub());

    }

    return r;

}


template <typename T, typename Allocator>


void Axpy (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& x)

{

    ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });

}


template <typename T, typename Allocator>


void Xpay (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& x)

{

    ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi = a*yi + xi; });

}


template <typename T, typename Allocator>


void LinComb (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& xa,

              T b, AlgVector<T,Allocator> const& xb)

{

    ForEach(y, xa, xb, [=] AMREX_GPU_DEVICE (T& yi, T const& xai, T const& xbi) {

                           yi = a*xai + b*xbi;

                       });

}


}


#endif

AMReX_AlgVector.H

AMREX_ASSERT
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38

AMREX_RESTRICT
#define AMREX_RESTRICT
Definition AMReX_Extension.H:37

AMREX_GPU_DEVICE
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18

amrex::AlgVector
Distributed dense vector that mirrors the layout of an AlgPartition.
Definition AMReX_AlgVector.H:29

amrex::Long
amrex_long Long
Definition AMReX_INT.H:30

amrex::ParallelForOMP
void ParallelForOMP(T n, L const &f) noexcept
Performance-portable kernel launch function with optional OpenMP threading.
Definition AMReX_GpuLaunch.H:328

amrex::ParallelAllReduce::Sum
void Sum(Gpu::DeviceVector< T > &v, MPI_Comm comm)
Definition AMReX_GpuParallelReduce.H:34

amrex::ParallelContext::CommunicatorSub
MPI_Comm CommunicatorSub() noexcept
sub-communicator for current frame
Definition AMReX_ParallelContext.H:70

amrex
Definition AMReX_Amr.cpp:50

amrex::Order::F
@ F

amrex::Dot
FAB::value_type Dot(FabArray< FAB > const &x, int xcomp, FabArray< FAB > const &y, int ycomp, int ncomp, IntVect const &nghost, bool local=false)
Compute dot products of two FabArrays.
Definition AMReX_FabArrayUtility.H:1609

amrex::LinComb
void LinComb(MF &dst, typename MF::value_type a, MF const &src_a, int acomp, typename MF::value_type b, MF const &src_b, int bcomp, int dcomp, int ncomp, IntVect const &nghost)
dst = a*src_a + b*src_b
Definition AMReX_FabArrayUtility.H:1941

amrex::Direction::y
@ y

amrex::Direction::x
@ x

amrex::Direction::z
@ z

amrex::ForEach
constexpr void ForEach(TypeList< Ts... >, F &&f)
For each type t in TypeList, call f(t)
Definition AMReX_TypeList.H:83

amrex::Xpay
void Xpay(MF &dst, typename MF::value_type a, MF const &src, int scomp, int dcomp, int ncomp, IntVect const &nghost)
dst = src + a * dst
Definition AMReX_FabArrayUtility.H:1906

amrex::Axpy
void Axpy(AlgVector< T, Allocator > &y, T a, AlgVector< T, Allocator > const &x)
y = ax + y. For GPU builds this is asynchronous with respect to the host.
Definition AMReX_AlgVecUtil.H:188

amrex::IsAlgVector
Definition AMReX_AlgVecUtil.H:16