docs_html/doxygen/AMReX__ParticleTransformation_8H_source.html

#ifndef AMREX_PARTICLETRANSFORMATION_H_

#define AMREX_PARTICLETRANSFORMATION_H_

#include <AMReX_Config.H>


#include <AMReX_IntVect.H>

#include <AMReX_Box.H>

#include <AMReX_Gpu.H>

#include <AMReX_Print.H>

#include <AMReX_ParticleTile.H>

#include <AMReX_ParticleTileRT.H>

#include <AMReX_ParticleUtil.H>


namespace amrex

{


template <typename T_ParticleType, int NAR, int NAI>

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE


void copyParticle (const      ParticleTileData<T_ParticleType, NAR, NAI>& dst,

                   const ConstParticleTileData<T_ParticleType, NAR, NAI>& src,

                   int src_i, int dst_i) noexcept

{

    AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real);

    AMREX_ASSERT(dst.m_num_runtime_int  == src.m_num_runtime_int );


    if constexpr(!T_ParticleType::is_soa_particle) {

        dst.m_aos[dst_i] = src.m_aos[src_i];

    } else {

        dst.m_idcpu[dst_i] = src.m_idcpu[src_i];

    }

    if constexpr(NAR > 0) {

        for (int j = 0; j < NAR; ++j) {

            dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i];

        }

    }

    for (int j = 0; j < dst.m_num_runtime_real; ++j) {

        dst.m_runtime_rdata[j][dst_i] = src.m_runtime_rdata[j][src_i];

    }

    if constexpr(NAI > 0) {

        for (int j = 0; j < NAI; ++j) {

            dst.m_idata[j][dst_i] = src.m_idata[j][src_i];

        }

    }

    for (int j = 0; j < dst.m_num_runtime_int; ++j) {

        dst.m_runtime_idata[j][dst_i] = src.m_runtime_idata[j][src_i];

    }

}


template <typename T_ParticleType, int NAR, int NAI>

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE


void copyParticle (const ParticleTileData<T_ParticleType, NAR, NAI>& dst,

                   const ParticleTileData<T_ParticleType, NAR, NAI>& src,

                   int src_i, int dst_i) noexcept

{

    AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real);

    AMREX_ASSERT(dst.m_num_runtime_int  == src.m_num_runtime_int );


    if constexpr(T_ParticleType::is_soa_particle) {

        dst.m_idcpu[dst_i] = src.m_idcpu[src_i];

    } else {

        dst.m_aos[dst_i] = src.m_aos[src_i];

    }

    for (int j = 0; j < NAR; ++j) {

        dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i];

    }

    for (int j = 0; j < dst.m_num_runtime_real; ++j) {

        dst.m_runtime_rdata[j][dst_i] = src.m_runtime_rdata[j][src_i];

    }

    for (int j = 0; j < NAI; ++j) {

        dst.m_idata[j][dst_i] = src.m_idata[j][src_i];

    }

    for (int j = 0; j < dst.m_num_runtime_int; ++j) {

        dst.m_runtime_idata[j][dst_i] = src.m_runtime_idata[j][src_i];

    }

}


template <typename T_ParticleType, int NAR, int NAI>

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE


void swapParticle (const ParticleTileData<T_ParticleType, NAR, NAI>& dst,

                   const ParticleTileData<T_ParticleType, NAR, NAI>& src,

                   int src_i, int dst_i) noexcept

{

    AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real);

    AMREX_ASSERT(dst.m_num_runtime_int  == src.m_num_runtime_int );


    if constexpr(T_ParticleType::is_soa_particle) {

        amrex::Swap(src.m_idcpu[src_i], dst.m_idcpu[dst_i]);

    } else {

        amrex::Swap(src.m_aos[src_i], dst.m_aos[dst_i]);

    }

    if constexpr (NAR > 0) {

    for (int j = 0; j < NAR; ++j) {

        amrex::Swap(dst.m_rdata[j][dst_i], src.m_rdata[j][src_i]);

    }

    }

    for (int j = 0; j < dst.m_num_runtime_real; ++j) {

        amrex::Swap(dst.m_runtime_rdata[j][dst_i], src.m_runtime_rdata[j][src_i]);

    }

    if constexpr (NAI > 0) {

    for (int j = 0; j < NAI; ++j) {

        amrex::Swap(dst.m_idata[j][dst_i], src.m_idata[j][src_i]);

    }

    }

    for (int j = 0; j < dst.m_num_runtime_int; ++j) {

        amrex::Swap(dst.m_runtime_idata[j][dst_i], src.m_runtime_idata[j][src_i]);

    }

}


template <class DRType, class DIType, class SRType, class SIType>

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE


void copyParticle (const ParticleTileDataRT<DRType, DIType>& dst,

                   const ParticleTileDataRT<SRType, SIType>& src,

                   typename ParticleTileDataRT<SRType, SIType>::size_type src_i,

                   typename ParticleTileDataRT<DRType, DIType>::size_type dst_i) noexcept

{

    AMREX_ASSERT(dst.m_n_real == src.m_n_real);

    AMREX_ASSERT(dst.m_n_int == src.m_n_int);


    dst.idcpu(dst_i) = src.idcpu(src_i);


    for (int j = 0; j < dst.m_n_real; ++j) {

        dst.rdata(j)[dst_i] = src.rdata(j)[src_i];

    }


    for (int j = 0; j < dst.m_n_int; ++j) {

        dst.idata(j)[dst_i] = src.idata(j)[src_i];

    }

}


template <class DRType, class DIType, class SRType, class SIType>

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE


void swapParticle (const ParticleTileDataRT<DRType, DIType>& dst,

                   const ParticleTileDataRT<SRType, SIType>& src,

                   typename ParticleTileDataRT<SRType, SIType>::size_type src_i,

                   typename ParticleTileDataRT<DRType, DIType>::size_type dst_i) noexcept

{

    AMREX_ASSERT(dst.m_n_real == src.m_n_real);

    AMREX_ASSERT(dst.m_n_int == src.m_n_int);


    amrex::Swap(dst.idcpu(dst_i), src.idcpu(src_i));


    for (int j = 0; j < dst.m_n_real; ++j) {

        amrex::Swap(dst.rdata(j)[dst_i], src.rdata(j)[src_i]);

    }


    for (int j = 0; j < dst.m_n_int; ++j) {

        amrex::Swap(dst.idata(j)[dst_i], src.idata(j)[src_i]);

    }

}


template <typename DstTile, typename SrcTile>


void copyParticles (DstTile& dst, const SrcTile& src) noexcept

{

    auto np = src.numParticles();

    copyParticles(dst, src, 0, 0, np);

}


template <typename DstTile, typename SrcTile, std::integral Index, typename N>


void copyParticles (DstTile& dst, const SrcTile& src,

                    Index src_start, Index dst_start, N n) noexcept

{

    const auto src_data = src.getConstParticleTileData();

          auto dst_data = dst.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( n, i,

    {

        copyParticle(dst_data, src_data, src_start+i, dst_start+i);

    });


    Gpu::streamSynchronize();

}


template <typename DstTile, typename SrcTile, typename F>


void transformParticles (DstTile& dst, const SrcTile& src, F&& f) noexcept

{

    auto np = src.numParticles();

    using Index = decltype(np);

    transformParticles(dst, src, Index{0}, Index{0}, np, std::forward<F>(f));

}


template <typename DstTile, typename SrcTile, std::integral Index, typename N, typename F>


void transformParticles (DstTile& dst, const SrcTile& src,

                         Index src_start, Index dst_start, N n, F const& f) noexcept

{

    const auto src_data = src.getConstParticleTileData();

          auto dst_data = dst.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( n, i,

    {

        f(dst_data, src_data, src_start+i, dst_start+i);

    });


    Gpu::streamSynchronize();

}


template <typename DstTile1, typename DstTile2, typename SrcTile, typename F>


void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src, F&& f) noexcept

{

    auto np = src.numParticles();

    using Index = decltype(np);

    transformParticles(dst1, dst2, src, Index{0}, Index{0}, Index{0}, np, std::forward<F>(f));

}


template <typename DstTile1, typename DstTile2, typename SrcTile,

          std::integral Index, typename N, typename F>


void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src,

                         Index src_start, Index dst1_start, Index dst2_start, N n, F const& f) noexcept

{

    const auto src_data  = src.getConstParticleTileData();

          auto dst1_data = dst1.getParticleTileData();

          auto dst2_data = dst2.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( n, i,

    {

        f(dst1_data, dst2_data, src_data, src_start+i, dst1_start+i, dst2_start+i);

    });


    Gpu::streamSynchronize();

}


template <typename DstTile, typename SrcTile, std::integral Index, typename N>


Index filterParticles (DstTile& dst, const SrcTile& src, const Index* mask) noexcept

{

    return filterParticles(dst, src, mask, Index{0}, Index{0}, src.numParticles());

}


template <typename DstTile, typename SrcTile, std::integral Index, typename N>


Index filterParticles (DstTile& dst, const SrcTile& src, const Index* mask,

                       Index src_start, Index dst_start, N n) noexcept

{

    Gpu::DeviceVector<Index> offsets(n);

    Gpu::exclusive_scan(mask, mask+n, offsets.begin());


    Index last_mask=0, last_offset=0;

    Gpu::copyAsync(Gpu::deviceToHost, mask+n-1, mask + n, &last_mask);

    Gpu::copyAsync(Gpu::deviceToHost, offsets.data()+n-1, offsets.data()+n, &last_offset);


    auto* p_offsets = offsets.dataPtr();


    const auto src_data = src.getConstParticleTileData();

          auto dst_data = dst.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( n, i,

    {

        if (mask[i]) { copyParticle(dst_data, src_data, src_start+i, dst_start+p_offsets[i]); }

    });


    Gpu::streamSynchronize();

    return last_mask + last_offset;

}


template <typename DstTile, typename SrcTile, typename Pred>

requires (!std::is_pointer_v<std::decay_t<Pred>>)


int filterParticles (DstTile& dst, const SrcTile& src, Pred&& p) noexcept

{

    return filterParticles(dst, src, std::forward<Pred>(p), 0, 0, src.numParticles());

}


template <typename DstTile, typename SrcTile, typename Pred, typename Index, typename N>

requires (!std::is_pointer_v<std::decay_t<Pred>>)


Index filterParticles (DstTile& dst, const SrcTile& src, Pred const& p,

                       Index src_start, Index dst_start, N n) noexcept

{

    Gpu::DeviceVector<Index> mask(n);


    auto* p_mask = mask.dataPtr();

    const auto src_data = src.getConstParticleTileData();


    amrex::ParallelForRNG(n,

    [p, p_mask, src_data, src_start] AMREX_GPU_DEVICE (int i, amrex::RandomEngine const& engine) noexcept

    {

        amrex::ignore_unused(p, p_mask, src_data, src_start, engine);

        if constexpr (IsCallable<Pred,decltype(src_data),Index,RandomEngine>::value) {

            p_mask[i] = p(src_data, src_start+i, engine);

        } else {

            p_mask[i] = p(src_data, src_start+i);

        }

    });

    return filterParticles(dst, src, mask.dataPtr(), src_start, dst_start, n);

}


template <typename DstTile, typename SrcTile, std::integral Index, typename F>


Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask, F const& f,

                                   Index src_start, Index dst_start) noexcept

{

    auto np = src.numParticles();

    Gpu::DeviceVector<Index> offsets(np);

    Gpu::exclusive_scan(mask, mask+np, offsets.begin());


    Index last_mask=0, last_offset=0;

    Gpu::copyAsync(Gpu::deviceToHost, mask+np-1, mask + np, &last_mask);

    Gpu::copyAsync(Gpu::deviceToHost, offsets.data()+np-1, offsets.data()+np, &last_offset);


    auto const* p_offsets = offsets.dataPtr();


    const auto src_data = src.getConstParticleTileData();

          auto dst_data = dst.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( np, i,

    {

        if (mask[i]) {

            f(dst_data, src_data, src_start+i,

              dst_start+p_offsets[i]);

        }

    });


    Gpu::streamSynchronize();

    return last_mask + last_offset;

}


template <typename DstTile, typename SrcTile, std::integral Index, typename F>


Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask, F&& f) noexcept

{

    return filterAndTransformParticles(dst, src, mask, std::forward<F>(f), Index{0}, Index{0});

}


template <typename DstTile, typename SrcTile, typename Pred, typename F>

requires (!std::is_pointer_v<std::decay_t<Pred>>)


int filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f) noexcept

{

    using Index = decltype(src.numParticles());

    return filterAndTransformParticles(dst, src, std::forward<Pred>(p), std::forward<F>(f),

                                       Index{0}, Index{0});

}


template <typename DstTile1, typename DstTile2, typename SrcTile, std::integral Index, typename F>


Index filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2,

                                   const SrcTile& src, Index* mask, F const& f) noexcept

{

    auto np = src.numParticles();

    Gpu::DeviceVector<Index> offsets(np);

    Gpu::exclusive_scan(mask, mask+np, offsets.begin());


    Index last_mask=0, last_offset=0;

    Gpu::copyAsync(Gpu::deviceToHost, mask+np-1, mask + np, &last_mask);

    Gpu::copyAsync(Gpu::deviceToHost, offsets.data()+np-1, offsets.data()+np, &last_offset);


    auto* p_offsets = offsets.dataPtr();


    const auto src_data  = src.getConstParticleTileData();

          auto dst_data1 = dst1.getParticleTileData();

          auto dst_data2 = dst2.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( np, i,

    {

        if (mask[i]) { f(dst_data1, dst_data2, src_data, i, p_offsets[i], p_offsets[i]); }

    });


    Gpu::streamSynchronize();

    return last_mask + last_offset;

}


template <typename DstTile1, typename DstTile2, typename SrcTile, typename Pred, typename F>

requires (!std::is_pointer_v<std::decay_t<Pred>>)


int filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src,

                                 Pred const& p, F&& f) noexcept

{

    auto np = src.numParticles();

    Gpu::DeviceVector<int> mask(np);


    auto* p_mask = mask.dataPtr();

    const auto src_data = src.getConstParticleTileData();


    amrex::ParallelForRNG(np,

    [p, p_mask, src_data] AMREX_GPU_DEVICE (int i, amrex::RandomEngine const& engine) noexcept

    {

        amrex::ignore_unused(p, p_mask, src_data, engine);

        if constexpr (IsCallable<Pred,decltype(src_data),int,RandomEngine>::value) {

            p_mask[i] = p(src_data, i, engine);

        } else {

            p_mask[i] = p(src_data, i);

        }

    });

    return filterAndTransformParticles(dst1, dst2, src, mask.dataPtr(), std::forward<F>(f));

}


template <typename DstTile, typename SrcTile, typename Pred, typename F, typename Index>

requires (!std::is_pointer_v<std::decay_t<Pred>>)


Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred const& p, F&& f,

                                   Index src_start, Index dst_start) noexcept

{

    auto np = src.numParticles();

    Gpu::DeviceVector<Index> mask(np);


    auto* p_mask = mask.dataPtr();

    const auto src_data = src.getConstParticleTileData();


    amrex::ParallelForRNG(np,

    [p, p_mask, src_data, src_start] AMREX_GPU_DEVICE (int i, amrex::RandomEngine const& engine) noexcept

    {

        amrex::ignore_unused(p, p_mask, src_data, src_start, engine);

        if constexpr (IsCallable<Pred,decltype(src_data),Index,RandomEngine>::value) {

            p_mask[i] = p(src_data, src_start+i, engine);

        } else {

            p_mask[i] = p(src_data, src_start+i);

        }

    });

    return filterAndTransformParticles(dst, src, mask.dataPtr(), std::forward<F>(f), src_start, dst_start);

}


template <typename PTile, typename N, std::integral Index>


void gatherParticles (PTile& dst, const PTile& src, N np, const Index* inds)

{

    const auto src_data = src.getConstParticleTileData();

          auto dst_data = dst.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( np, i,

    {

        copyParticle(dst_data, src_data, inds[i], i);

    });


    Gpu::streamSynchronize();

}


template <typename PTile, typename N, std::integral Index>


void scatterParticles (PTile& dst, const PTile& src, N np, const Index* inds)

{

    const auto src_data = src.getConstParticleTileData();

          auto dst_data = dst.getParticleTileData();


    AMREX_HOST_DEVICE_FOR_1D( np, i,

    {

        copyParticle(dst_data, src_data, i, inds[i]);

    });


    Gpu::streamSynchronize();

}


}


#endif // include guard

AMREX_ASSERT
#define AMREX_ASSERT(EX)
Definition AMReX_BLassert.H:38

AMReX_Box.H
Integer-lattice boxes and helpers for defining index-space regions.

AMREX_FORCE_INLINE
#define AMREX_FORCE_INLINE
Definition AMReX_Extension.H:124

AMREX_HOST_DEVICE_FOR_1D
#define AMREX_HOST_DEVICE_FOR_1D(...)
Definition AMReX_GpuLaunchMacrosC.nolint.H:105

AMREX_GPU_DEVICE
#define AMREX_GPU_DEVICE
Definition AMReX_GpuQualifiers.H:18

AMREX_GPU_HOST_DEVICE
#define AMREX_GPU_HOST_DEVICE
Definition AMReX_GpuQualifiers.H:20

AMReX_Gpu.H

AMReX_IntVect.H

mask
Array4< int const  > mask
Definition AMReX_InterpFaceRegister.cpp:93

AMReX_ParticleTileRT.H

AMReX_ParticleTile.H

AMReX_ParticleUtil.H

AMReX_Print.H

amrex::PODVector
Dynamically allocated vector for trivially copyable data.
Definition AMReX_PODVector.H:308

amrex::PODVector::begin
iterator begin() noexcept
Definition AMReX_PODVector.H:674

amrex::PODVector::dataPtr
T * dataPtr() noexcept
Definition AMReX_PODVector.H:670

amrex::PODVector::data
T * data() noexcept
Definition AMReX_PODVector.H:666

amrex::Gpu::exclusive_scan
OutIter exclusive_scan(InIter begin, InIter end, OutIter result)
Definition AMReX_Scan.H:1176

amrex::Swap
__host__ __device__ void Swap(T &t1, T &t2) noexcept
Definition AMReX_Algorithm.H:108

amrex::Gpu::copyAsync
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept
A host-to-device copy routine. Note this is just a wrapper around memcpy, so it assumes contiguous st...
Definition AMReX_GpuContainers.H:228

amrex::Gpu::deviceToHost
static constexpr DeviceToHost deviceToHost
Definition AMReX_GpuContainers.H:106

amrex::Gpu::streamSynchronize
void streamSynchronize() noexcept
Definition AMReX_GpuDevice.H:310

amrex
Definition AMReX_Amr.cpp:50

amrex::gatherParticles
void gatherParticles(PTile &dst, const PTile &src, N np, const Index *inds)
Gather particles copies particles into contiguous order from an arbitrary order. Specifically,...
Definition AMReX_ParticleTransformation.H:730

amrex::ignore_unused
__host__ __device__ void ignore_unused(const Ts &...)
No-op helper that marks variables as intentionally unused.
Definition AMReX.H:259

amrex::swapParticle
__host__ __device__ void swapParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle swapping routine that can run on the GPU.
Definition AMReX_ParticleTransformation.H:120

amrex::Order::F
@ F

amrex::copyParticle
__host__ __device__ void copyParticle(const ParticleTileData< T_ParticleType, NAR, NAI > &dst, const ConstParticleTileData< T_ParticleType, NAR, NAI > &src, int src_i, int dst_i) noexcept
A general single particle copying routine that can run on the GPU.
Definition AMReX_ParticleTransformation.H:32

amrex::copyParticles
void copyParticles(DstTile &dst, const SrcTile &src) noexcept
Copy particles from src to dst. This version copies all the particles, writing them to the beginning ...
Definition AMReX_ParticleTransformation.H:222

amrex::filterAndTransformParticles
Index filterAndTransformParticles(DstTile &dst, const SrcTile &src, Index *mask, F const &f, Index src_start, Index dst_start) noexcept
Conditionally copy particles from src to dst based on the value of mask. A transformation will also b...
Definition AMReX_ParticleTransformation.H:513

amrex::filterParticles
Index filterParticles(DstTile &dst, const SrcTile &src, const Index *mask) noexcept
Conditionally copy particles from src to dst based on the value of mask.
Definition AMReX_ParticleTransformation.H:389

amrex::ParallelForRNG
AMREX_ATTRIBUTE_FLATTEN_FOR void ParallelForRNG(T n, L const &f) noexcept
Definition AMReX_GpuLaunchFunctsC.H:1151

amrex::scatterParticles
void scatterParticles(PTile &dst, const PTile &src, N np, const Index *inds)
Scatter particles copies particles from contiguous order into an arbitrary order. Specifically,...
Definition AMReX_ParticleTransformation.H:759

amrex::transformParticles
void transformParticles(DstTile &dst, const SrcTile &src, F &&f) noexcept
Apply the function f to all the particles in src, writing the result to dst. This version does all th...
Definition AMReX_ParticleTransformation.H:273

amrex::ConstParticleTileData
Definition AMReX_ParticleTile.H:517

amrex::IsCallable
Test if a given type T is callable with arguments of type Args...
Definition AMReX_TypeTraits.H:208

amrex::ParticleTileDataRT
Definition AMReX_ParticleTileRT.H:72

amrex::ParticleTileDataRT::size_type
Long size_type
Definition AMReX_ParticleTileRT.H:74

amrex::ParticleTileData
Definition AMReX_ParticleTile.H:34

amrex::RandomEngine
Definition AMReX_RandomEngine.H:72