Calculates the distribution of FABs to MPI processes. More...
#include <AMReX_DistributionMapping.H>
Classes | |
struct | LIpairGT |
struct | LIpairLT |
struct | Ref |
struct | RefID |
Public Types | |
enum | Strategy { UNDEFINED = -1 , ROUNDROBIN , KNAPSACK , SFC , RRSFC } |
The distribution strategies. More... | |
Public Member Functions | |
DistributionMapping () noexcept | |
The default constructor. More... | |
DistributionMapping (const DistributionMapping &rhs)=default | |
The copy constructor. More... | |
DistributionMapping & | operator= (const DistributionMapping &rhs)=default |
The copy assignment operator. More... | |
DistributionMapping (DistributionMapping &&rhs) noexcept=default | |
The move constructor. More... | |
DistributionMapping & | operator= (DistributionMapping &&rhs) noexcept=default |
The move assignment operator. More... | |
~DistributionMapping () noexcept=default | |
The destructor. More... | |
DistributionMapping (const Vector< int > &pmap) | |
Create an object with the specified mapping. More... | |
DistributionMapping (Vector< int > &&pmap) noexcept | |
DistributionMapping (const BoxArray &boxes, int nprocs=ParallelDescriptor::NProcs()) | |
Build mapping out of BoxArray over nprocs processors. More... | |
DistributionMapping (const DistributionMapping &d1, const DistributionMapping &d2) | |
This is a very specialized distribution map. Do NOT use it unless you really understand what it does. More... | |
void | define (const BoxArray &boxes, int nprocs=ParallelDescriptor::NProcs()) |
Build mapping out of BoxArray over nprocs processors. You need to call this if you built your DistributionMapping with the default constructor. More... | |
void | define (const Vector< int > &pmap) |
Build mapping out of an Array of ints. You need to call this if you built your DistributionMapping with the default constructor. More... | |
void | define (Vector< int > &&pmap) noexcept |
const Vector< int > & | ProcessorMap () const noexcept |
Returns a constant reference to the mapping of boxes in the underlying BoxArray to the CPU that holds the FAB on that Box. ProcessorMap()[i] is an integer in the interval [0, NCPU) where NCPU is the number of CPUs being used. More... | |
Long | size () const noexcept |
Length of the underlying processor map. More... | |
Long | capacity () const noexcept |
bool | empty () const noexcept |
Long | linkCount () const noexcept |
Number of references to this DistributionMapping. More... | |
int | operator[] (int index) const noexcept |
Equivalent to ProcessorMap()[index]. More... | |
std::istream & | readFrom (std::istream &is) |
std::ostream & | writeOn (std::ostream &os) const |
bool | operator== (const DistributionMapping &rhs) const noexcept |
Are the distributions equal? More... | |
bool | operator!= (const DistributionMapping &rhs) const noexcept |
Are the distributions different? More... | |
void | SFCProcessorMap (const BoxArray &boxes, const std::vector< Long > &wgts, int nprocs, bool sort=true) |
void | SFCProcessorMap (const BoxArray &boxes, const std::vector< Long > &wgts, int nprocs, Real &efficiency, bool sort=true) |
void | KnapSackProcessorMap (const std::vector< Long > &wgts, int nprocs, Real *efficiency=nullptr, bool do_full_knapsack=true, int nmax=std::numeric_limits< int >::max(), bool sort=true) |
void | KnapSackProcessorMap (const DistributionMapping &olddm, const std::vector< Long > &wgts, Real keep_ratio, Real &old_efficiency, Real &new_efficiency, int nmax=std::numeric_limits< int >::max()) |
void | RoundRobinProcessorMap (int nboxes, int nprocs, bool sort=true) |
void | RoundRobinProcessorMap (const std::vector< Long > &wgts, int nprocs, bool sort=true) |
RefID | getRefID () const noexcept |
This gives a unique ID of the reference, which is different from dmID above. More... | |
Static Public Member Functions | |
static void | strategy (Strategy how) |
Set/get the distribution strategy. More... | |
static Strategy | strategy () |
static void | SFC_Threshold (int n) |
Set/get the space filling curve threshold. More... | |
static int | SFC_Threshold () |
static void | Initialize () |
Initializes distribution strategy from ParmParse. More... | |
static void | Finalize () |
static bool | SameRefs (const DistributionMapping &lhs, const DistributionMapping &rhs) |
static DistributionMapping | makeKnapSack (const MultiFab &weight, int nmax=std::numeric_limits< int >::max()) |
static DistributionMapping | makeKnapSack (const MultiFab &weight, Real &eff, int nmax=std::numeric_limits< int >::max()) |
static DistributionMapping | makeKnapSack (const Vector< Real > &rcost, int nmax=std::numeric_limits< int >::max()) |
static DistributionMapping | makeKnapSack (const Vector< Real > &rcost, Real &eff, int nmax=std::numeric_limits< int >::max(), bool sort=true) |
static DistributionMapping | makeKnapSack (const LayoutData< Real > &rcost_local, Real ¤tEfficiency, Real &proposedEfficiency, int nmax=std::numeric_limits< int >::max(), bool broadcastToAll=true, int root=ParallelDescriptor::IOProcessorNumber(), Real keep_ratio=Real(0.0)) |
Computes a new distribution mapping by distributing input costs according to the knapsack algorithm. More... | |
static DistributionMapping | makeRoundRobin (const MultiFab &weight) |
static DistributionMapping | makeSFC (const MultiFab &weight, bool sort=true) |
static DistributionMapping | makeSFC (const MultiFab &weight, Real &eff, bool sort=true) |
static DistributionMapping | makeSFC (const Vector< Real > &rcost, const BoxArray &ba, bool sort=true) |
static DistributionMapping | makeSFC (const Vector< Real > &rcost, const BoxArray &ba, Real &eff, bool sort=true) |
static DistributionMapping | makeSFC (const LayoutData< Real > &rcost_local, Real ¤tEfficiency, Real &proposedEfficiency, bool broadcastToAll=true, int root=ParallelDescriptor::IOProcessorNumber()) |
Computes a new distribution mapping by distributing input costs according to a space filling curve (SFC) algorithm. More... | |
static std::vector< std::vector< int > > | makeSFC (const BoxArray &ba, bool use_box_vol=true, int nprocs=ParallelContext::NProcsSub()) |
template<typename T > | |
static void | ComputeDistributionMappingEfficiency (const DistributionMapping &dm, const std::vector< T > &cost, Real *efficiency) |
Computes the average cost per MPI rank given a distribution mapping global cost vector. More... | |
Private Types | |
using | LIpair = std::pair< Long, int > |
using | PVMF = void(DistributionMapping::*)(const BoxArray &, int) |
A useful typedef. More... | |
Private Member Functions | |
const Vector< int > & | getIndexArray () |
const std::vector< bool > & | getOwnerShip () |
void | RoundRobinProcessorMap (const BoxArray &boxes, int nprocs) |
Ways to create the processor map. More... | |
void | KnapSackProcessorMap (const BoxArray &boxes, int nprocs) |
void | SFCProcessorMap (const BoxArray &boxes, int nprocs) |
void | RRSFCProcessorMap (const BoxArray &boxes, int nprocs) |
void | RoundRobinDoIt (int nboxes, int nprocs, std::vector< LIpair > *LIpairV=nullptr, bool sort=true) |
void | KnapSackDoIt (const std::vector< Long > &wgts, int nprocs, Real &efficiency, bool do_full_knapsack, int nmax=std::numeric_limits< int >::max(), bool sort=true) |
void | SFCProcessorMapDoIt (const BoxArray &boxes, const std::vector< Long > &wgts, int nprocs, bool sort=true, Real *efficiency=nullptr) |
void | RRSFCDoIt (const BoxArray &boxes, int nprocs) |
Static Private Member Functions | |
static void | Sort (std::vector< LIpair > &vec, bool reverse) |
static void | LeastUsedCPUs (int nprocs, Vector< int > &result) |
Least used ordering of CPUs (by # of bytes of FAB data). More... | |
static void | LeastUsedTeams (Vector< int > &rteam, Vector< Vector< int > > &rworker, int nteams, int nworkers) |
rteam: Least used ordering of Teams rworker[i]: Least used ordering of team workers for Team i More... | |
Private Attributes | |
std::shared_ptr< Ref > | m_ref |
The data – a reference-counted pointer to a Ref. More... | |
Static Private Attributes | |
static Strategy | m_Strategy = DistributionMapping::SFC |
Everyone uses the same Strategy – defaults to SFC. More... | |
static PVMF | m_BuildMap = nullptr |
Pointer to one of the CreateProcessorMap() functions. Corresponds to the one specified by m_Strategy. More... | |
Friends | |
template<typename T > | |
class | FabArray |
class | FabArrayBase |
Calculates the distribution of FABs to MPI processes.
This class calculates the distribution of FABs to MPI processes in a FabArray in a multi-processor environment. By distribution is meant what MPI process in the multi-processor environment owns what FAB. Only the BoxArray on which the FabArray is built is used in determining the distribution. The three types of distributions supported are round-robin, knapsack, and SFC. In the round-robin distribution FAB i is owned by CPU iN where N is total number of CPUs. In the knapsack distribution the FABs are partitioned across CPUs such that the total volume of the Boxes in the underlying BoxArray are as equal across CPUs as is possible. The SFC distribution is based on a space filling curve.
|
private |
|
private |
A useful typedef.
|
noexcept |
The default constructor.
|
default |
The copy constructor.
|
defaultnoexcept |
The move constructor.
|
defaultnoexcept |
The destructor.
Create an object with the specified mapping.
|
explicit |
Build mapping out of BoxArray over nprocs processors.
amrex::DistributionMapping::DistributionMapping | ( | const DistributionMapping & | d1, |
const DistributionMapping & | d2 | ||
) |
This is a very specialized distribution map. Do NOT use it unless you really understand what it does.
|
inlinenoexcept |
|
static |
Computes the average cost per MPI rank given a distribution mapping global cost vector.
[in] | dm | distribution mapping (mapping from FAB to MPI processes) |
[in] | cost | vector giving mapping from FAB to the corresponding cost |
[in,out] | efficiency | average cost per MPI process, as computed from the given distribution mapping and cost |
void amrex::DistributionMapping::define | ( | const BoxArray & | boxes, |
int | nprocs = ParallelDescriptor::NProcs() |
||
) |
Build mapping out of BoxArray over nprocs processors. You need to call this if you built your DistributionMapping with the default constructor.
Build mapping out of an Array of ints. You need to call this if you built your DistributionMapping with the default constructor.
|
inlinenoexcept |
|
static |
|
private |
|
inlinenoexcept |
This gives a unique ID of the reference, which is different from dmID above.
|
static |
Initializes distribution strategy from ParmParse.
ParmParse options are:
DistributionMapping.strategy = ROUNDROBIN DistributionMapping.strategy = KNAPSACK DistributionMapping.strategy = SFC DistributionMapping.strategy = RRFC
|
private |
|
private |
void amrex::DistributionMapping::KnapSackProcessorMap | ( | const DistributionMapping & | olddm, |
const std::vector< Long > & | wgts, | ||
Real | keep_ratio, | ||
Real & | old_efficiency, | ||
Real & | new_efficiency, | ||
int | nmax = std::numeric_limits<int>::max() |
||
) |
void amrex::DistributionMapping::KnapSackProcessorMap | ( | const std::vector< Long > & | wgts, |
int | nprocs, | ||
Real * | efficiency = nullptr , |
||
bool | do_full_knapsack = true , |
||
int | nmax = std::numeric_limits<int>::max() , |
||
bool | sort = true |
||
) |
Least used ordering of CPUs (by # of bytes of FAB data).
|
staticprivate |
rteam: Least used ordering of Teams rworker[i]: Least used ordering of team workers for Team i
|
inlinenoexcept |
Number of references to this DistributionMapping.
|
static |
Computes a new distribution mapping by distributing input costs according to the knapsack
algorithm.
[in] | rcost_local | LayoutData of costs; contains, e.g., costs for the local boxes in the FAB array, corresponding indices in the global indices in the FAB array, and the distribution mapping |
[in,out] | currentEfficiency | writes the efficiency (i.e., mean cost over all MPI ranks, normalized to the max cost) given the current distribution mapping |
[in,out] | proposedEfficiency | writes the efficiency for the proposed distribution mapping |
[in] | nmax | the maximum number of boxes that can be assigned to any MPI rank by the knapsack algorithm |
[in] | broadcastToAll | controls whether to transmit the proposed distribution mapping to all other processes; setting this to false allows to, e.g., test whether the proposed distribution mapping is an improvement relative to the current distribution mapping, before deciding to broadcast the proposed distribution mapping |
[in] | root | which process to collect the local costs from others and compute the proposed distribution mapping |
[in] | keep_ratio | controls the fraction of load that should be kept on the original process. |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
if use_box_vol is true, weight boxes by their volume in Distribute otherwise, all boxes will be treated with equal weight
|
static |
Computes a new distribution mapping by distributing input costs according to a space filling curve
(SFC) algorithm.
[in] | rcost_local | LayoutData of costs; contains, e.g., costs for the local boxes in the FAB array, corresponding indices in the global indices in the FAB array, and the distribution mapping |
[in,out] | currentEfficiency | writes the efficiency (i.e., mean cost over all MPI ranks, normalized to the max cost) given the current distribution mapping |
[in,out] | proposedEfficiency | writes the efficiency for the proposed distribution mapping |
[in] | broadcastToAll | controls whether to transmit the proposed distribution mapping to all other processes; setting this to false allows to, e.g., test whether the proposed distribution mapping is an improvement relative to the current distribution mapping, before deciding to broadcast the proposed distribution mapping |
[in] | root | which process to collect the local costs from others and compute the proposed distribution mapping |
|
static |
|
static |
|
static |
|
static |
|
noexcept |
Are the distributions different?
|
default |
The copy assignment operator.
|
defaultnoexcept |
The move assignment operator.
|
noexcept |
Are the distributions equal?
Equivalent to ProcessorMap()[index].
Returns a constant reference to the mapping of boxes in the underlying BoxArray to the CPU that holds the FAB on that Box. ProcessorMap()[i] is an integer in the interval [0, NCPU) where NCPU is the number of CPUs being used.
std::istream & amrex::DistributionMapping::readFrom | ( | std::istream & | is | ) |
|
private |
|
private |
Ways to create the processor map.
void amrex::DistributionMapping::RoundRobinProcessorMap | ( | const std::vector< Long > & | wgts, |
int | nprocs, | ||
bool | sort = true |
||
) |
void amrex::DistributionMapping::RoundRobinProcessorMap | ( | int | nboxes, |
int | nprocs, | ||
bool | sort = true |
||
) |
|
inlinestatic |
|
static |
|
static |
Set/get the space filling curve threshold.
void amrex::DistributionMapping::SFCProcessorMap | ( | const BoxArray & | boxes, |
const std::vector< Long > & | wgts, | ||
int | nprocs, | ||
bool | sort = true |
||
) |
void amrex::DistributionMapping::SFCProcessorMap | ( | const BoxArray & | boxes, |
const std::vector< Long > & | wgts, | ||
int | nprocs, | ||
Real & | efficiency, | ||
bool | sort = true |
||
) |
|
private |
|
inlinenoexcept |
Length of the underlying processor map.
|
staticprivate |
|
static |
|
static |
Set/get the distribution strategy.
std::ostream & amrex::DistributionMapping::writeOn | ( | std::ostream & | os | ) | const |
|
friend |
|
friend |
|
staticprivate |
Pointer to one of the CreateProcessorMap() functions. Corresponds to the one specified by m_Strategy.
|
private |
The data – a reference-counted pointer to a Ref.
|
staticprivate |
Everyone uses the same Strategy – defaults to SFC.