2356 if (
rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2357 if (isLocallyIndexed ()) {
2363 else if (isGloballyIndexed ()) {
2364 auto gblInds = getGlobalIndsViewHost(rowinfo);
2365 for (
size_t j = 0; j < theNumEntries; ++j) {
2366 indices[j] = colMap_->getLocalElement (gblInds(j));
2372#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2373 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2377 const Teuchos::ArrayView<LocalOrdinal>&indices,
2378 size_t& numEntries)
const
2380 using Teuchos::ArrayView;
2381 const char tfecfFuncName[] =
"getLocalRowCopy: ";
2383 TEUCHOS_TEST_FOR_EXCEPTION(
2384 isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2385 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2386 "does not have a column Map yet. That means we don't have local indices "
2387 "for columns yet, so it doesn't make sense to call this method. If the "
2388 "graph doesn't have a column Map yet, you should call fillComplete on "
2393 const RowInfo rowinfo = this->getRowInfo (localRow);
2395 const size_t theNumEntries = rowinfo.numEntries;
2396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2397 (
static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2398 "Specified storage (size==" << indices.size () <<
") does not suffice "
2399 "to hold all " << theNumEntries <<
" entry/ies for this row.");
2400 numEntries = theNumEntries;
2402 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2403 if (isLocallyIndexed ()) {
2404 auto lclInds = getLocalIndsViewHost(rowinfo);
2405 for (
size_t j = 0; j < theNumEntries; ++j) {
2406 indices[j] = lclInds(j);
2409 else if (isGloballyIndexed ()) {
2410 auto gblInds = getGlobalIndsViewHost(rowinfo);
2411 for (
size_t j = 0; j < theNumEntries; ++j) {
2412 indices[j] = colMap_->getLocalElement (gblInds(j));
2420 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2424 nonconst_global_inds_host_view_type &indices,
2425 size_t& numEntries)
const
2427 using Teuchos::ArrayView;
2435 static_cast<size_t> (indices.size ()) <
theNumEntries, std::runtime_error,
2436 "Specified storage (size==" << indices.size () <<
") does not suffice "
2437 "to hold all " <<
theNumEntries <<
" entry/ies for this row.");
2440 if (
rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2441 if (isLocallyIndexed ()) {
2444 indices[
j] = colMap_->getGlobalElement (
lclInds(
j));
2447 else if (isGloballyIndexed ()) {
2457#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2458 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2462 const Teuchos::ArrayView<GlobalOrdinal>& indices,
2463 size_t& numEntries)
const
2465 using Teuchos::ArrayView;
2473 static_cast<size_t> (indices.size ()) <
theNumEntries, std::runtime_error,
2474 "Specified storage (size==" << indices.size () <<
") does not suffice "
2475 "to hold all " <<
theNumEntries <<
" entry/ies for this row.");
2478 if (
rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2479 if (isLocallyIndexed ()) {
2482 indices[
j] = colMap_->getGlobalElement (
lclInds(
j));
2485 else if (isGloballyIndexed ()) {
2486 auto gblInds = getGlobalIndsViewHost(rowinfo);
2487 for (
size_t j = 0; j < theNumEntries; ++j) {
2488 indices[j] = gblInds(j);
2496 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2501 local_inds_host_view_type &indices)
const
2506 (isGloballyIndexed (), std::runtime_error,
"The graph's indices are "
2507 "currently stored as global indices, so we cannot return a view with "
2508 "local column indices, whether or not the graph has a column Map. If "
2509 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2512 if (
rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2514 indices = lclIndsUnpacked_wdv.getHostSubview(
rowInfo.offset1D,
2521 indices = local_inds_host_view_type();
2526 (
static_cast<size_t> (indices.size ()) !=
2527 getNumEntriesInLocalRow (localRow), std::logic_error,
"indices.size() "
2528 "= " << indices.extent(0) <<
" != getNumEntriesInLocalRow(localRow=" <<
2529 localRow <<
") = " << getNumEntriesInLocalRow(localRow) <<
2530 ". Please report this bug to the Tpetra developers.");
2535 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2540 global_inds_host_view_type &indices)
const
2545 (isLocallyIndexed (), std::runtime_error,
"The graph's indices are "
2546 "currently stored as local indices, so we cannot return a view with "
2547 "global column indices. Use getGlobalRowCopy() instead.");
2552 if (
rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2554 indices = gblInds_wdv.getHostSubview(
rowInfo.offset1D,
2559 indices =
typename global_inds_dualv_type::t_host::const_type();
2563 (
static_cast<size_t> (indices.size ()) !=
2565 std::logic_error,
"indices.size() = " << indices.extent(0)
2566 <<
" != getNumEntriesInGlobalRow(globalRow=" <<
globalRow <<
") = "
2568 <<
". Please report this bug to the Tpetra developers.");
2572#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2573 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2578 Teuchos::ArrayView<const LocalOrdinal>& indices)
const
2583 (isGloballyIndexed (), std::runtime_error,
"The graph's indices are "
2584 "currently stored as global indices, so we cannot return a view with "
2585 "local column indices, whether or not the graph has a column Map. If "
2586 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2591 indices = Teuchos::null;
2592 if (
rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2594 indices = this->getLocalView (
rowInfo);
2599 indices = indices (0,
rowInfo.numEntries);
2603 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2604 (
static_cast<size_t> (indices.size ()) !=
2605 getNumEntriesInLocalRow (localRow), std::logic_error,
"indices.size() "
2606 "= " << indices.size () <<
" != getNumEntriesInLocalRow(localRow=" <<
2607 localRow <<
") = " << getNumEntriesInLocalRow (localRow) <<
2608 ". Please report this bug to the Tpetra developers.");
2614#ifdef TPETRA_ENABLE_DEPRECATED_CODE
2615 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2620 Teuchos::ArrayView<const GlobalOrdinal>& indices)
const
2622 const char tfecfFuncName[] =
"getGlobalRowView: ";
2624 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2625 (isLocallyIndexed (), std::runtime_error,
"The graph's indices are "
2626 "currently stored as local indices, so we cannot return a view with "
2627 "global column indices. Use getGlobalRowCopy() instead.");
2631 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2632 indices = Teuchos::null;
2633 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2634 rowInfo.numEntries > 0) {
2635 indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2640 (
static_cast<size_t> (indices.size ()) !=
2641 getNumEntriesInGlobalRow (globalRow),
2642 std::logic_error,
"indices.size() = " << indices.size ()
2643 <<
" != getNumEntriesInGlobalRow(globalRow=" << globalRow <<
") = "
2644 << getNumEntriesInGlobalRow (globalRow)
2645 <<
". Please report this bug to the Tpetra developers.");
2651 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2655 const Teuchos::ArrayView<const LocalOrdinal>& indices)
2660 (! isFillActive (), std::runtime_error,
"Fill must be active.");
2662 (isGloballyIndexed (), std::runtime_error,
2663 "Graph indices are global; use insertGlobalIndices().");
2665 (! hasColMap (), std::runtime_error,
2666 "Cannot insert local indices without a column Map.");
2668 (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2669 "Local row index " << localRow <<
" is not in the row Map "
2670 "on the calling process.");
2671 if (! indicesAreAllocated ()) {
2672 allocateIndices (LocalIndices, verbose_);
2681 using Teuchos::Array;
2682 using Teuchos::toString;
2684 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2689 for (size_type
k = 0;
k < indices.size (); ++
k) {
2690 if (!
colMap.isNodeLocalElement (indices[
k])) {
2696 std::ostringstream
os;
2697 os <<
"Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2698 "entries in owned row " << localRow <<
", at the following column "
2699 "indices: " << toString (indices) <<
"." <<
endl;
2700 os <<
"Of those, the following indices are not in the column Map on "
2701 "this process: " << toString (
badColInds) <<
"." <<
endl <<
"Since "
2702 "the graph has a column Map already, it is invalid to insert entries "
2703 "at those locations.";
2709 insertLocalIndicesImpl (localRow, indices);
2713 (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2714 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2715 "! isLocallyIndexed() is true. Please report this bug to the "
2716 "Tpetra developers.");
2720 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2728 this->insertLocalIndices (localRow,
indsT);
2732 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2743 (this->isLocallyIndexed (), std::runtime_error,
2744 "graph indices are local; use insertLocalIndices().");
2750 (! this->isFillActive (), std::runtime_error,
2751 "You are not allowed to call this method if fill is not active. "
2752 "If fillComplete has been called, you must first call resumeFill "
2753 "before you may insert indices.");
2754 if (! indicesAreAllocated ()) {
2755 allocateIndices (GlobalIndices, verbose_);
2757 const LO
lclRow = this->rowMap_->getLocalElement (
gblRow);
2758 if (
lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2760 if (this->hasColMap ()) {
2776 std::ostringstream
os;
2777 os <<
"You attempted to insert entries in owned row " <<
gblRow
2778 <<
", at the following column indices: [";
2785 os <<
"]." <<
endl <<
"Of those, the following indices are not in "
2786 "the column Map on this process: [";
2793 os <<
"]." <<
endl <<
"Since the matrix has a column Map already, "
2794 "it is invalid to insert entries at those locations.";
2796 (
true, std::invalid_argument,
os.str ());
2809 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2820 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2829 const char tfecfFuncName[] =
"insertGlobalIndicesFiltered: ";
2832 (this->isLocallyIndexed (), std::runtime_error,
2833 "Graph indices are local; use insertLocalIndices().");
2839 (! this->isFillActive (), std::runtime_error,
2840 "You are not allowed to call this method if fill is not active. "
2841 "If fillComplete has been called, you must first call resumeFill "
2842 "before you may insert indices.");
2843 if (! indicesAreAllocated ()) {
2844 allocateIndices (GlobalIndices, verbose_);
2849 if (! colMap_.is_null ()) {
2860 if (
lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2884 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2903 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2910 ! isFillActive (), std::runtime_error,
"requires that fill is active.");
2912 isStorageOptimized (), std::runtime_error,
2913 "cannot remove indices after optimizeStorage() has been called.");
2915 isGloballyIndexed (), std::runtime_error,
"graph indices are global.");
2917 ! rowMap_->isNodeLocalElement (
lrow), std::runtime_error,
2918 "Local row " <<
lrow <<
" is not in the row Map on the calling process.");
2919 if (! indicesAreAllocated ()) {
2920 allocateIndices (LocalIndices, verbose_);
2925 clearGlobalConstants ();
2927 if (k_numRowEntries_.extent (0) != 0) {
2928 this->k_numRowEntries_(
lrow) = 0;
2933 (getNumEntriesInLocalRow (
lrow) != 0 ||
2934 ! indicesAreAllocated () ||
2935 ! isLocallyIndexed (), std::logic_error,
2936 "Violated stated post-conditions. Please contact Tpetra team.");
2941 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2945 const typename local_graph_device_type::entries_type::non_const_type&
columnIndices)
2949 ! hasColMap () || getColMap ().
is_null (), std::runtime_error,
2950 "The graph must have a column Map before you may call this method.");
2957 std::runtime_error,
"Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2962 std::runtime_error,
"rowPointers.size() = " <<
rowPtrLen <<
2963 " != this->getNodeNumRows()+1 = " << (
numLocalRows + 1) <<
".");
2967 if (debug_ && this->isSorted()) {
2970 using exec_space =
typename local_graph_device_type::execution_space;
2971 using size_type =
typename local_graph_device_type::size_type;
2972 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0,
numLocalRows),
2987 auto comm = this->getComm();
2988 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX,
notSorted,
2996 message = std::string(
"ERROR, rank ") + std::to_string(comm->getRank()) +
", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2999 throw std::invalid_argument(
"CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
3007 ((this->lclIndsUnpacked_wdv.extent (0) != 0 ||
this->gblInds_wdv.extent (0) != 0),
3008 std::runtime_error,
"You may not call this method if 1-D data "
3009 "structures are already allocated.");
3011 indicesAreAllocated_ =
true;
3012 indicesAreLocal_ =
true;
3013 indicesAreSorted_ =
true;
3014 noRedundancies_ =
true;
3016 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3020 set_need_sync_host_uvm_access();
3024 storageStatus_ = Details::STORAGE_1D_PACKED;
3029 numAllocForAllRows_ = 0;
3030 k_numAllocPerRow_ =
decltype (k_numAllocPerRow_) ();
3032 checkInternalState ();
3036 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3043 typedef typename local_graph_device_type::row_map_type
row_map_type;
3044 typedef typename row_map_type::array_layout
layout_type;
3047 Kokkos::MemoryUnmanaged> input_view_type;
3051 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
3061 Kokkos::deep_copy (Kokkos::Impl::if_c<
same,
3069 std::is_same<
typename row_map_type::memory_space,
3070 Kokkos::HostSpace>::value;
3091 Kokkos::View<LocalOrdinal*, layout_type, device_type>
k_ind =
3092 Kokkos::Compat::getKokkosViewDeepCopy<device_type> (
columnIndices ());
3097 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3105 const char tfecfFuncName[] =
"getNumEntriesPerLocalRowUpperBound: ";
3106 const char suffix[] =
" Please report this bug to the Tpetra developers.";
3117 if (this->indicesAreAllocated ()) {
3118 if (this->isStorageOptimized ()) {
3122 (
numRows != 0 && rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3123 "The graph has " <<
numRows <<
" (> 0) row"
3124 << (
numRows != 1 ?
"s" :
"") <<
" on the calling process, "
3125 "but the k_rowPtrs_ array has zero entries." <<
suffix);
3126 Teuchos::ArrayRCP<size_t>
numEnt;
3135 numEnt[
i] = rowPtrsUnpacked_host_(
i+1) - rowPtrsUnpacked_host_(
i);
3153 else if (k_numRowEntries_.extent (0) != 0) {
3167 if (k_numAllocPerRow_.extent (0) != 0) {
3183 "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3188 "numEntriesForAll and allRowsSame are not consistent. The former "
3193 "numEntriesPerRow and allRowsSame are not consistent. The former has "
3203 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3208 using Teuchos::Comm;
3209 using Teuchos::outArg;
3212 using Teuchos::REDUCE_MAX;
3213 using Teuchos::REDUCE_MIN;
3214 using Teuchos::reduceAll;
3219 using size_type =
typename Teuchos::Array<GO>::size_type;
3222 std::unique_ptr<std::string>
prefix;
3224 prefix = this->createPrefix(
"CrsGraph",
"globalAssemble");
3225 std::ostringstream
os;
3227 std::cerr <<
os.str();
3232 (! isFillActive (), std::runtime_error,
"Fill must be active before "
3233 "you may call this method.");
3248 std::ostringstream
os;
3250 std::cerr <<
os.str();
3254 else if (verbose_) {
3255 std::ostringstream
os;
3256 os << *
prefix <<
"At least 1 process has nonlocal rows"
3258 std::cerr <<
os.str();
3277 for (
auto mapIter = this->nonlocals_.begin ();
3278 mapIter != this->nonlocals_.end ();
3308 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3313 std::ostringstream
os;
3314 os << *
prefix <<
"nonlocalRowMap->getIndexBase()="
3316 std::cerr <<
os.str();
3329 for (
auto mapIter = this->nonlocals_.begin ();
3330 mapIter != this->nonlocals_.end ();
3339 std::ostringstream
os;
3341 std::cerr <<
os.str();
3357 std::ostringstream
os;
3359 std::cerr <<
os.str();
3367 std::ostringstream
os;
3368 os << *
prefix <<
"Original row Map is NOT 1-to-1" <<
endl;
3369 std::cerr <<
os.str();
3386 std::ostringstream
os;
3388 std::cerr <<
os.str();
3399 std::ostringstream
os;
3401 std::cerr <<
os.str();
3413 checkInternalState ();
3415 std::ostringstream
os;
3417 std::cerr <<
os.str();
3422 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3427 clearGlobalConstants();
3428 if (
params != Teuchos::null) this->setParameterList (
params);
3430 indicesAreSorted_ =
true;
3431 noRedundancies_ =
true;
3432 fillComplete_ =
false;
3436 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3451 Teuchos::RCP<const map_type>
domMap = this->getDomainMap ();
3453 domMap = this->getRowMap ();
3455 Teuchos::RCP<const map_type>
ranMap = this->getRangeMap ();
3457 ranMap = this->getRowMap ();
3463 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3466 fillComplete (
const Teuchos::RCP<const map_type>& domainMap,
3467 const Teuchos::RCP<const map_type>&
rangeMap,
3468 const Teuchos::RCP<Teuchos::ParameterList>&
params)
3472 const bool verbose = verbose_;
3474 std::unique_ptr<std::string>
prefix;
3476 prefix = this->createPrefix(
"CrsGraph",
"fillComplete");
3477 std::ostringstream
os;
3479 std::cerr <<
os.str();
3483 (! isFillActive () || isFillComplete (), std::runtime_error,
3484 "Graph fill state must be active (isFillActive() "
3485 "must be true) before calling fillComplete().");
3487 const int numProcs = getComm ()->getSize ();
3495 if (!
params.is_null ()) {
3496 if (
params->isParameter (
"sort column map ghost gids")) {
3497 sortGhostsAssociatedWithEachProcessor_ =
3498 params->get<
bool> (
"sort column map ghost gids",
3499 sortGhostsAssociatedWithEachProcessor_);
3501 else if (
params->isParameter (
"Sort column Map ghost GIDs")) {
3502 sortGhostsAssociatedWithEachProcessor_ =
3503 params->get<
bool> (
"Sort column Map ghost GIDs",
3504 sortGhostsAssociatedWithEachProcessor_);
3511 if (!
params.is_null ()) {
3519 if (! indicesAreAllocated ()) {
3522 allocateIndices (LocalIndices, verbose);
3525 allocateIndices (GlobalIndices, verbose);
3542 std::ostringstream
os;
3543 os << *
prefix <<
"Do not need to call globalAssemble; "
3544 "assertNoNonlocalInserts="
3548 std::cerr <<
os.str();
3553 std::ostringstream
os;
3555 Details::Impl::verbosePrintMap(
3556 os, nonlocals_.begin(), nonlocals_.end(),
3557 nonlocals_.size(),
"nonlocals_");
3558 std::cerr <<
os.str() <<
endl;
3562 auto map = this->getMap();
3563 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
3565 if (! comm.is_null()) {
3566 using Teuchos::REDUCE_MAX;
3567 using Teuchos::reduceAll;
3574 "least one process in the CrsGraph's communicator. This "
3575 "means either that you incorrectly set the "
3576 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3577 "or that you inserted invalid entries. "
3578 "Rerun with the environment variable TPETRA_VERBOSE="
3579 "CrsGraph set to see the entries of nonlocals_ on every "
3580 "MPI process (WARNING: lots of output).");
3585 "nonlocals_.size()=" <<
numNonlocals <<
" != 0 on the "
3586 "calling process. This means either that you incorrectly "
3587 "set the \"No Nonlocal Changes\" fillComplete parameter "
3588 "to true, or that you inserted invalid entries. "
3589 "Rerun with the environment "
3590 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3591 "of nonlocals_ on every MPI process (WARNING: lots of "
3604 Teuchos::Array<int> remotePIDs (0);
3607 this->makeColMap (remotePIDs);
3613 this->makeIndicesLocal(verbose);
3618 using Teuchos::REDUCE_MIN;
3619 using Teuchos::reduceAll;
3620 using Teuchos::outArg;
3624 if (!
map.is_null ()) {
3625 comm =
map->getComm ();
3627 if (comm.is_null ()) {
3637 std::ostringstream
os;
3640 (
true, std::runtime_error,
os.str ());
3657 this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3665 this->fillLocalGraph (
params);
3668 params->get (
"compute global constants",
true);
3670 this->computeGlobalConstants ();
3673 this->computeLocalConstants ();
3675 this->fillComplete_ =
true;
3676 this->checkInternalState ();
3679 std::ostringstream
os;
3681 std::cerr <<
os.str();
3686 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3690 const Teuchos::RCP<const map_type>&
rangeMap,
3691 const Teuchos::RCP<const import_type>&
importer,
3692 const Teuchos::RCP<const export_type>&
exporter,
3693 const Teuchos::RCP<Teuchos::ParameterList>&
params)
3696#ifdef HAVE_TPETRA_MMM_TIMINGS
3700 std::string
prefix = std::string(
"Tpetra ")+
label + std::string(
": ");
3701 using Teuchos::TimeMonitor;
3702 Teuchos::RCP<Teuchos::TimeMonitor>
MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-Setup"))));
3708 std::runtime_error,
"The input domain Map and range Map must be nonnull.");
3710 isFillComplete () || ! hasColMap (), std::runtime_error,
"You may not "
3711 "call this method unless the graph has a column Map.");
3713 getNodeNumRows () > 0 && rowPtrsUnpacked_host_.extent (0) == 0,
3714 std::runtime_error,
"The calling process has getNodeNumRows() = "
3715 << getNodeNumRows () <<
" > 0 rows, but the row offsets array has not "
3718 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) != getNodeNumRows () + 1,
3719 std::runtime_error,
"The row offsets array has length " <<
3720 rowPtrsUnpacked_host_.extent (0) <<
" != getNodeNumRows()+1 = " <<
3721 (getNodeNumRows () + 1) <<
".");
3736 numAllocForAllRows_ = 0;
3737 k_numAllocPerRow_ =
decltype (k_numAllocPerRow_) ();
3738 indicesAreAllocated_ =
true;
3743 indicesAreLocal_ =
true;
3744 indicesAreGlobal_ =
false;
3747#ifdef HAVE_TPETRA_MMM_TIMINGS
3749 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-Maps"))));
3754 indicesAreSorted_ =
true;
3755 noRedundancies_ =
true;
3758#ifdef HAVE_TPETRA_MMM_TIMINGS
3760 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-mIXcheckI"))));
3763 importer_ = Teuchos::null;
3764 exporter_ = Teuchos::null;
3767 !
importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3768 !
importer->getTargetMap ()->isSameAs (*getColMap ()),
3769 std::invalid_argument,
": importer does not match matrix maps.");
3774#ifdef HAVE_TPETRA_MMM_TIMINGS
3776 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-mIXcheckE"))));
3781 !
exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3782 !
exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3783 std::invalid_argument,
": exporter does not match matrix maps.");
3787#ifdef HAVE_TPETRA_MMM_TIMINGS
3789 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-mIXmake"))));
3791 Teuchos::Array<int> remotePIDs (0);
3792 this->makeImportExport (remotePIDs,
false);
3794#ifdef HAVE_TPETRA_MMM_TIMINGS
3796 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-fLG"))));
3798 this->fillLocalGraph (
params);
3801 params->get (
"compute global constants",
true);
3804#ifdef HAVE_TPETRA_MMM_TIMINGS
3806 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-cGC (const)"))));
3808 this->computeGlobalConstants ();
3811#ifdef HAVE_TPETRA_MMM_TIMINGS
3813 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-cGC (noconst)"))));
3815 this->computeLocalConstants ();
3818 fillComplete_ =
true;
3820#ifdef HAVE_TPETRA_MMM_TIMINGS
3822 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-G-cIS"))));
3824 checkInternalState ();
3828 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3833 using ::Tpetra::Details::computeOffsetsFromCounts;
3835 typedef typename local_graph_device_type::row_map_type
row_map_type;
3837 typedef typename local_graph_device_type::entries_type::non_const_type
lclinds_1d_type;
3838 const char tfecfFuncName[] =
"fillLocalGraph (called from fillComplete or "
3839 "expertStaticFillComplete): ";
3840 const size_t lclNumRows = this->getNodeNumRows ();
3846 if (!
params.is_null () && !
params->get (
"Optimize Storage",
true)) {
3856 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3857 (rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3858 "k_rowPtrs_ has size zero, but shouldn't");
3859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3860 (rowPtrsUnpacked_host_.extent (0) != lclNumRows + 1, std::logic_error,
3861 "rowPtrsUnpacked_host_.extent(0) = "
3862 << rowPtrsUnpacked_host_.extent (0) <<
" != (lclNumRows + 1) = "
3863 << (lclNumRows + 1) <<
".");
3864 const size_t numOffsets = rowPtrsUnpacked_host_.extent (0);
3865 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets-1);
3866 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3868 lclIndsUnpacked_wdv.extent (0) != valToCheck,
3869 std::logic_error,
"numOffsets=" << numOffsets <<
" != 0 "
3870 " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3871 <<
" != k_rowPtrs_(" << numOffsets <<
")=" << valToCheck
3875 size_t allocSize = 0;
3877 allocSize = this->getNodeAllocationSize ();
3879 catch (std::logic_error& e) {
3880 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3881 (
true, std::logic_error,
"getNodeAllocationSize threw "
3882 "std::logic_error: " << e.what ());
3884 catch (std::runtime_error& e) {
3885 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3886 (
true, std::runtime_error,
"getNodeAllocationSize threw "
3887 "std::runtime_error: " << e.what ());
3889 catch (std::exception& e) {
3890 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3891 (
true, std::runtime_error,
"getNodeAllocationSize threw "
3892 "std::exception: " << e.what ());
3895 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3896 (
true, std::runtime_error,
"getNodeAllocationSize threw "
3897 "an exception not a subclass of std::exception.");
3900 if (this->getNodeNumEntries () != allocSize) {
3903 non_const_row_map_type ptr_d;
3904 row_map_type ptr_d_const;
3913 if (rowPtrsUnpacked_host_.extent (0) != 0) {
3914 const size_t numOffsets =
3915 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0));
3916 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets - 1);
3917 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3918 (valToCheck !=
size_t(lclIndsUnpacked_wdv.extent(0)),
3919 std::logic_error,
"(Unpacked branch) Before allocating "
3920 "or packing, k_rowPtrs_(" << (numOffsets-1) <<
")="
3921 << valToCheck <<
" != lclIndsUnpacked_wdv.extent(0)="
3922 << lclIndsUnpacked_wdv.extent (0) <<
".");
3932 size_t lclTotalNumEntries = 0;
3936 non_const_row_map_type (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
3937 ptr_d_const = ptr_d;
3941 typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3943 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3944 (
size_t(numRowEnt_h.extent (0)) != lclNumRows,
3945 std::logic_error,
"(Unpacked branch) "
3946 "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3947 <<
" != getNodeNumRows()=" << lclNumRows <<
"");
3953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3954 (
static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3955 std::logic_error,
"(Unpacked branch) After allocating "
3956 "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3957 <<
" != lclNumRows+1 = " << (lclNumRows+1) <<
".");
3958 const auto valToCheck =
3959 ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3961 (valToCheck != lclTotalNumEntries, std::logic_error,
3962 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3963 "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3964 <<
") = " << valToCheck <<
" != total number of entries "
3965 "on the calling process = " << lclTotalNumEntries
3971 lclinds_1d_type ind_d =
3972 lclinds_1d_type (
"Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3984 typedef pack_functor<
3985 typename local_graph_device_type::entries_type::non_const_type,
3986 typename local_inds_dualv_type::t_dev::const_type,
3988 typename local_graph_device_type::row_map_type> inds_packer_type;
3989 inds_packer_type f (ind_d,
3990 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3991 ptr_d, rowPtrsUnpacked_dev_);
3993 typedef typename decltype (ind_d)::execution_space exec_space;
3994 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3995 Kokkos::parallel_for (range_type (0, lclNumRows), f);
3999 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4000 (ptr_d.extent (0) == 0, std::logic_error,
4001 "(\"Optimize Storage\"=true branch) After packing, "
4002 "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
4003 "never allocated.");
4004 if (ptr_d.extent (0) != 0) {
4005 const size_t numOffsets =
static_cast<size_t> (ptr_d.extent (0));
4006 const auto valToCheck =
4007 ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4008 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4009 (
static_cast<size_t> (valToCheck) != ind_d.extent (0),
4010 std::logic_error,
"(\"Optimize Storage\"=true branch) "
4011 "After packing, ptr_d(" << (numOffsets-1) <<
")="
4012 << valToCheck <<
" != ind_d.extent(0)="
4013 << ind_d.extent(0) <<
".");
4017 setRowPtrsPacked(ptr_d_const);
4018 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
4021 setRowPtrsPacked(rowPtrsUnpacked_dev_);
4022 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
4025 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4026 (rowPtrsPacked_dev_.extent (0) == 0, std::logic_error,
4027 "(\"Optimize Storage\"=false branch) "
4028 "rowPtrsPacked_dev_.extent(0) = 0. "
4029 "This probably means that "
4030 "k_rowPtrs_ was never allocated.");
4031 if (rowPtrsPacked_dev_.extent (0) != 0) {
4032 const size_t numOffsets =
4033 static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
4034 const size_t valToCheck =
4035 rowPtrsPacked_host_(numOffsets - 1);
4036 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4037 (valToCheck !=
size_t(lclIndsPacked_wdv.extent (0)),
4038 std::logic_error,
"(\"Optimize Storage\"=false branch) "
4039 "rowPtrsPacked_dev_(" << (numOffsets-1) <<
")="
4041 <<
" != lclIndsPacked_wdv.extent(0)="
4042 << lclIndsPacked_wdv.extent (0) <<
".");
4048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4049 (
static_cast<size_t> (rowPtrsPacked_dev_.extent (0)) != lclNumRows + 1,
4050 std::logic_error,
"After packing, rowPtrsPacked_dev_.extent(0) = " <<
4051 rowPtrsPacked_dev_.extent (0) <<
" != lclNumRows+1 = " << (lclNumRows+1)
4053 if (rowPtrsPacked_dev_.extent (0) != 0) {
4054 const size_t numOffsets =
static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
4055 const auto valToCheck = rowPtrsPacked_host_(numOffsets - 1);
4056 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4057 (
static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
4058 std::logic_error,
"After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
4059 <<
") = " << valToCheck <<
" != lclIndsPacked_wdv.extent(0) = "
4060 << lclIndsPacked_wdv.extent (0) <<
".");
4064 if (requestOptimizedStorage) {
4070 k_numRowEntries_ = row_entries_type ();
4073 setRowPtrsUnpacked(rowPtrsPacked_dev_);
4074 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
4076 storageStatus_ = Details::STORAGE_1D_PACKED;
4079 set_need_sync_host_uvm_access();
4082 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4096 isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4097 "Requires matching maps and non-static graph.");
4101 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4105 const Teuchos::RCP<const import_type>&
newImport,
4108 using Teuchos::REDUCE_MIN;
4109 using Teuchos::reduceAll;
4113 typedef typename local_inds_dualv_type::t_host
col_inds_type;
4117 isFillComplete (), std::runtime_error,
"The graph is fill complete "
4118 "(isFillComplete() returns true). You must call resumeFill() before "
4119 "you may call this method.");
4137 const LO
lclNumRows =
static_cast<LO
> (this->getNodeNumRows ());
4166 auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4171 if (indicesAreAllocated ()) {
4172 if (isLocallyIndexed ()) {
4176 const size_t allocSize = this->getNodeAllocationSize ();
4186 if (
oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4195 if (
gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4201 if (
newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4236 for (
size_t k = 0;
k <
rowInfo.numEntries; ++
k) {
4255 getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4256 if (! comm.is_null ()) {
4261 gblSuccess[0] == 0, std::runtime_error,
"It is not possible to continue."
4262 " The most likely reason is that the graph is locally indexed, but the "
4263 "column Map is missing (null) on some processes, due to a previous call "
4264 "to replaceColMap().");
4267 gblSuccess[1] == 0, std::runtime_error,
"On some process, the graph "
4268 "contains column indices that are in the old column Map, but not in the "
4269 "new column Map (on that process). This method does NOT redistribute "
4270 "data; it does not claim to do the work of an Import or Export operation."
4271 " This means that for all processess, the calling process MUST own all "
4272 "column indices, in both the old column Map and the new column Map. In "
4273 "this case, you will need to do an Import or Export operation to "
4274 "redistribute data.");
4277 if (isLocallyIndexed ()) {
4280 Kokkos::view_alloc(
"Tpetra::CrsGraph::lclIndReindexed",
4281 Kokkos::WithoutInitializing),
4293 indicesAreSorted_ =
false;
4301 const bool sorted =
false;
4302 const bool merged =
true;
4316 if (! domainMap_.is_null ()) {
4317 if (! domainMap_->isSameAs (*
newColMap)) {
4320 importer_ = Teuchos::null;
4329 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4334 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMap: ";
4336 colMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
4337 "this method unless the graph already has a column Map.");
4340 prefix <<
"The new domain Map must be nonnull.");
4343 Teuchos::RCP<const import_type>
newImporter = Teuchos::null;
4350 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4354 const Teuchos::RCP<const import_type>&
newImporter)
4356 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4358 colMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
4359 "this method unless the graph already has a column Map.");
4362 prefix <<
"The new domain Map must be nonnull.");
4372 (!
colSameAsDom, std::invalid_argument,
"If the new Import is null, "
4373 "then the new domain Map must be the same as the current column Map.");
4377 colMap_->isSameAs (* (
newImporter->getTargetMap ()));
4382 "new Import is nonnull, then the current column Map must be the same "
4383 "as the new Import's target Map, and the new domain Map must be the "
4384 "same as the new Import's source Map.");
4389 importer_ = Teuchos::rcp_const_cast<import_type> (
newImporter);
4392 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4397 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMap: ";
4399 rowMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
4400 "this method unless the graph already has a row Map.");
4403 prefix <<
"The new range Map must be nonnull.");
4406 Teuchos::RCP<const export_type>
newExporter = Teuchos::null;
4413 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4417 const Teuchos::RCP<const export_type>&
newExporter)
4419 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4421 rowMap_.is_null (), std::invalid_argument,
prefix <<
"You may not call "
4422 "this method unless the graph already has a column Map.");
4425 prefix <<
"The new domain Map must be nonnull.");
4435 (!
rowSameAsRange, std::invalid_argument,
"If the new Export is null, "
4436 "then the new range Map must be the same as the current row Map.");
4442 rowMap_->isSameAs (* (
newExporter->getSourceMap ()));
4445 "new Export is nonnull, then the current row Map must be the same "
4446 "as the new Export's source Map, and the new range Map must be the "
4447 "same as the new Export's target Map.");
4452 exporter_ = Teuchos::rcp_const_cast<export_type> (
newExporter);
4455#ifdef TPETRA_ENABLE_DEPRECATED_CODE
4456 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4461 return getLocalGraphDevice();
4465 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4471 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4472 rowPtrsPacked_dev_);
4475 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4480 return local_graph_host_type(
4481 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4482 rowPtrsPacked_host_);
4485 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4490 using ::Tpetra::Details::ProfilingRegion;
4491 using Teuchos::ArrayView;
4492 using Teuchos::outArg;
4493 using Teuchos::reduceAll;
4496 ProfilingRegion
regionCGC (
"Tpetra::CrsGraph::computeGlobalConstants");
4498 this->computeLocalConstants ();
4503 if (! this->haveGlobalConstants_) {
4504 const Teuchos::Comm<int>& comm = * (this->getComm ());
4518 lcl =
static_cast<GST> (this->getNodeNumEntries ());
4521 this->globalNumEntries_ =
gbl;
4525 outArg (this->globalMaxNumRowEntries_));
4526 this->haveGlobalConstants_ =
true;
4531 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4536 using ::Tpetra::Details::ProfilingRegion;
4538 ProfilingRegion
regionCLC (
"Tpetra::CrsGraph::computeLocalConstants");
4539 if (this->haveLocalConstants_) {
4544 this->nodeMaxNumRowEntries_ =
4545 Teuchos::OrdinalTraits<size_t>::invalid();
4549 auto ptr = this->rowPtrsPacked_dev_;
4551 static_cast<LO
> (0) :
4552 (
static_cast<LO
> (ptr.extent(0)) -
static_cast<LO
> (1));
4555 ::Tpetra::Details::maxDifference (
"Tpetra::CrsGraph: nodeMaxNumRowEntries",
4558 this->haveLocalConstants_ =
true;
4562 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4563 std::pair<size_t, std::string>
4568 using Teuchos::arcp;
4569 using Teuchos::Array;
4574 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4576 typedef typename row_entries_type::non_const_value_type
num_ent_type;
4580 std::unique_ptr<std::string>
prefix;
4582 prefix = this->createPrefix(
"CrsGraph",
"makeIndicesLocal");
4583 std::ostringstream
os;
4584 os << *
prefix <<
"lclNumRows: " << getNodeNumRows() <<
endl;
4585 std::cerr <<
os.str();
4591 (! this->hasColMap (), std::logic_error,
"The graph does not have a "
4592 "column Map yet. This method should never be called in that case. "
4593 "Please report this bug to the Tpetra developers.");
4595 (this->getColMap ().
is_null (), std::logic_error,
"The graph claims "
4596 "that it has a column Map, because hasColMap() returns true. However, "
4597 "the result of getColMap() is null. This should never happen. Please "
4598 "report this bug to the Tpetra developers.");
4606 const LO
lclNumRows =
static_cast<LO
> (this->getNodeNumRows ());
4609 if (this->isGloballyIndexed () &&
lclNumRows != 0) {
4611 typename row_entries_type::const_type
h_numRowEnt =
4612 this->k_numRowEntries_;
4615 if (rowPtrsUnpacked_host_.extent (0) == 0) {
4616 errStrm <<
"k_rowPtrs_.extent(0) == 0. This should never "
4617 "happen here. Please report this bug to the Tpetra developers."
4620 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4632 using Kokkos::view_alloc;
4633 using Kokkos::WithoutInitializing;
4643 const std::string
label (
"Tpetra::CrsGraph::lclInd");
4645 std::ostringstream
os;
4646 os << *
prefix <<
"(Re)allocate lclInd_wdv: old="
4647 << lclIndsUnpacked_wdv.extent(0) <<
", new=" <<
numEnt <<
endl;
4648 std::cerr <<
os.str();
4665 std::ostringstream
os;
4666 os << *
prefix <<
"Allocate device mirror k_numRowEnt: "
4668 std::cerr <<
os.str();
4673 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4676 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4677 gblInds_wdv.getDeviceView(Access::ReadOnly),
4678 rowPtrsUnpacked_dev_,
4682 const int myRank = [
this] () {
4683 auto map = this->getMap ();
4684 if (
map.is_null ()) {
4688 auto comm =
map->getComm ();
4689 return comm.is_null () ? 0 : comm->getRank ();
4693 errStrm <<
"(Process " <<
myRank <<
") When converting column "
4694 "indices from global to local, we encountered " <<
lclNumErrs
4697 <<
" not live in the column Map on this process." <<
endl;
4704 std::ostringstream
os;
4705 os << *
prefix <<
"Free gblInds_wdv: "
4706 << gblInds_wdv.extent(0) <<
endl;
4707 std::cerr <<
os.str();
4709 gblInds_wdv = global_inds_wdv_type ();
4712 this->indicesAreLocal_ =
true;
4713 this->indicesAreGlobal_ =
false;
4714 this->checkInternalState ();
4719 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4729 std::unique_ptr<std::string>
prefix;
4732 std::ostringstream
os;
4734 std::cerr <<
os.str();
4742 Teuchos::RCP<const map_type>
colMap = this->colMap_;
4744 this->sortGhostsAssociatedWithEachProcessor_;
4754 using Teuchos::outArg;
4755 using Teuchos::REDUCE_MIN;
4756 using Teuchos::reduceAll;
4762 auto comm = this->getComm ();
4763 if (! comm.is_null ()) {
4769 std::ostringstream
os;
4772 (
true, std::runtime_error,
": An error happened on at "
4773 "least one process in the CrsGraph's communicator. "
4774 "Here are all processes' error messages:" << std::endl
4788 checkInternalState ();
4790 std::ostringstream
os;
4792 std::cerr <<
os.str();
4797 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4804 using host_execution_space =
4805 typename Kokkos::View<LO*, device_type>::HostMirror::
4807 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4810 (
"Tpetra::CrsGraph::sortAndMergeAllIndices");
4812 std::unique_ptr<std::string>
prefix;
4815 std::ostringstream
os;
4817 <<
"sorted=" << (
sorted ?
"true" :
"false")
4818 <<
", merged=" << (
merged ?
"true" :
"false") <<
endl;
4819 std::cerr <<
os.str();
4821 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4822 (this->isGloballyIndexed(), std::logic_error,
4823 "This method may only be called after makeIndicesLocal." );
4824 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4825 (! merged && this->isStorageOptimized(), std::logic_error,
4826 "The graph is already storage optimized, so we shouldn't be "
4827 "merging any indices. "
4828 "Please report this bug to the Tpetra developers.");
4830 if (! sorted || ! merged) {
4831 const LO lclNumRows(this->getNodeNumRows());
4832 auto range = range_type(0, lclNumRows);
4835 size_t totalNumDups = 0;
4836 Kokkos::parallel_reduce(range,
4837 [
this, sorted, merged] (
const LO lclRow,
size_t& numDups)
4839 const RowInfo rowInfo = this->getRowInfo(lclRow);
4840 numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4843 std::ostringstream os;
4844 os << *prefix <<
"totalNumDups=" << totalNumDups << endl;
4845 std::cerr << os.str();
4848 Kokkos::parallel_for(range,
4849 [
this, sorted, merged] (
const LO lclRow)
4851 const RowInfo rowInfo = this->getRowInfo(lclRow);
4852 this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4855 this->indicesAreSorted_ =
true;
4856 this->noRedundancies_ =
true;
4860 std::ostringstream os;
4861 os << *prefix <<
"Done" << endl;
4862 std::cerr << os.str();
4866 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4872 using ::Tpetra::Details::ProfilingRegion;
4873 using Teuchos::ParameterList;
4877 ProfilingRegion
regionMIE (
"Tpetra::CrsGraph::makeImportExport");
4880 (! this->hasColMap (), std::logic_error,
4881 "This method may not be called unless the graph has a column Map.");
4891 if (importer_.is_null ()) {
4893 if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4894 if (
params.is_null () || !
params->isSublist (
"Import")) {
4896 importer_ =
rcp (
new import_type (domainMap_, colMap_, remotePIDs));
4919 if (exporter_.is_null ()) {
4921 if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4922 if (
params.is_null () || !
params->isSublist (
"Export")) {
4934 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4939 std::ostringstream
oss;
4940 oss << dist_object_type::description ();
4941 if (isFillComplete ()) {
4942 oss <<
"{status = fill complete"
4943 <<
", global rows = " << getGlobalNumRows()
4944 <<
", global cols = " << getGlobalNumCols()
4945 <<
", global num entries = " << getGlobalNumEntries()
4949 oss <<
"{status = fill not complete"
4950 <<
", global rows = " << getGlobalNumRows()
4957 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4961 const Teuchos::EVerbosityLevel
verbLevel)
const
4963 using Teuchos::ArrayView;
4964 using Teuchos::Comm;
4966 using Teuchos::VERB_DEFAULT;
4967 using Teuchos::VERB_NONE;
4968 using Teuchos::VERB_LOW;
4969 using Teuchos::VERB_MEDIUM;
4970 using Teuchos::VERB_HIGH;
4971 using Teuchos::VERB_EXTREME;
4981 for (
size_t dec=10;
dec<getGlobalNumRows();
dec *= 10) {
4984 width = std::max<size_t> (
width,
static_cast<size_t> (11)) + 2;
4985 Teuchos::OSTab
tab (
out);
4994 if (
myImageID == 0)
out << this->description() << std::endl;
4996 if (isFillComplete() &&
myImageID == 0) {
4997 out <<
"Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5002 rowMap_->describe(
out,
vl);
5003 if (colMap_ != Teuchos::null) {
5004 if (
myImageID == 0)
out <<
"\nColumn map: " << std::endl;
5005 colMap_->describe(
out,
vl);
5007 if (domainMap_ != Teuchos::null) {
5008 if (
myImageID == 0)
out <<
"\nDomain map: " << std::endl;
5009 domainMap_->describe(
out,
vl);
5011 if (rangeMap_ != Teuchos::null) {
5012 if (
myImageID == 0)
out <<
"\nRange map: " << std::endl;
5013 rangeMap_->describe(
out,
vl);
5021 <<
"Node number of entries = " << this->getNodeNumEntries () << std::endl
5022 <<
"Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5023 if (! indicesAreAllocated ()) {
5024 out <<
"Indices are not allocated." << std::endl;
5036 out << std::setw(
width) <<
"Node ID"
5037 << std::setw(
width) <<
"Global Row"
5038 << std::setw(
width) <<
"Num Entries";
5053 if (isGloballyIndexed()) {
5054 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
5055 for (
size_t j=0;
j <
rowinfo.numEntries; ++
j){
5060 else if (isLocallyIndexed()) {
5061 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5062 for (
size_t j=0;
j <
rowinfo.numEntries; ++
j) {
5064 out << colMap_->getGlobalElement(
collid) <<
" ";
5080 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5091 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5096 const size_t numSameIDs,
5097 const Kokkos::DualView<
const local_ordinal_type*,
5099 const Kokkos::DualView<
const local_ordinal_type*,
5104 using LO = local_ordinal_type;
5105 using GO = global_ordinal_type;
5108 const bool verbose = verbose_;
5110 std::unique_ptr<std::string>
prefix;
5112 prefix = this->createPrefix(
"CrsGraph",
"copyAndPermute");
5113 std::ostringstream
os;
5115 std::cerr <<
os.str ();
5118 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5119 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
5120 std::runtime_error,
"permuteToLIDs.extent(0) = "
5121 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
5122 << permuteFromLIDs.extent (0) <<
".");
5126 const row_graph_type& srcRowGraph =
5127 dynamic_cast<const row_graph_type&
> (source);
5130 std::ostringstream os;
5131 os << *prefix <<
"Compute padding" << endl;
5132 std::cerr << os.str ();
5134 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
5135 permuteToLIDs, permuteFromLIDs, verbose);
5136 applyCrsPadding(*padding, verbose);
5141 const this_type* srcCrsGraph =
5142 dynamic_cast<const this_type*
> (&source);
5144 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
5145 const map_type& tgtRowMap = *(getRowMap());
5146 const bool src_filled = srcRowGraph.isFillComplete();
5147 nonconst_global_inds_host_view_type row_copy;
5153 if (src_filled || srcCrsGraph ==
nullptr) {
5155 std::ostringstream os;
5156 os << *prefix <<
"src_filled || srcCrsGraph == nullptr" << endl;
5157 std::cerr << os.str ();
5164 for (
size_t i = 0; i < numSameIDs; ++i, ++myid) {
5165 const GO gid = srcRowMap.getGlobalElement (myid);
5166 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5167 Kokkos::resize(row_copy,row_length);
5168 size_t check_row_length = 0;
5169 srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
5170 this->insertGlobalIndices (gid, row_length, row_copy.data());
5174 std::ostringstream os;
5175 os << *prefix <<
"! src_filled && srcCrsGraph != nullptr" << endl;
5176 std::cerr << os.str ();
5178 for (
size_t i = 0; i < numSameIDs; ++i, ++myid) {
5179 const GO gid = srcRowMap.getGlobalElement (myid);
5180 global_inds_host_view_type row;
5181 srcCrsGraph->getGlobalRowView (gid, row);
5182 this->insertGlobalIndices (gid, row.extent(0), row.data());
5189 auto permuteToLIDs_h = permuteToLIDs.view_host ();
5190 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5192 if (src_filled || srcCrsGraph ==
nullptr) {
5193 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5194 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5195 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5196 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5197 Kokkos::resize(row_copy,row_length);
5198 size_t check_row_length = 0;
5199 srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
5200 this->insertGlobalIndices (mygid, row_length, row_copy.data());
5203 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5204 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5205 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5206 global_inds_host_view_type row;
5207 srcCrsGraph->getGlobalRowView (srcgid, row);
5208 this->insertGlobalIndices (mygid, row.extent(0), row.data());
5213 std::ostringstream os;
5214 os << *prefix <<
"Done" << endl;
5215 std::cerr << os.str ();
5219 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5221 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5222 applyCrsPadding(
const padding_type& padding,
5225 using Details::ProfilingRegion;
5229 using row_ptrs_type =
5230 typename local_graph_device_type::row_map_type::non_const_type;
5231 using range_policy =
5232 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5233 const char tfecfFuncName[] =
"applyCrsPadding";
5234 ProfilingRegion regionCAP(
"Tpetra::CrsGraph::applyCrsPadding");
5236 std::unique_ptr<std::string> prefix;
5238 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5239 std::ostringstream os;
5240 os << *prefix <<
"padding: ";
5243 std::cerr << os.str();
5245 const int myRank = ! verbose ? -1 : [&] () {
5246 auto map = this->getMap();
5247 if (map.is_null()) {
5250 auto comm = map->getComm();
5251 if (comm.is_null()) {
5254 return comm->getRank();
5263 if (! indicesAreAllocated()) {
5265 std::ostringstream os;
5266 os << *prefix <<
"Call allocateIndices" << endl;
5267 std::cerr << os.str();
5269 allocateIndices(GlobalIndices, verbose);
5271 TEUCHOS_ASSERT( indicesAreAllocated() );
5277 std::ostringstream os;
5278 os << *prefix <<
"Allocate row_ptrs_beg: "
5279 << rowPtrsUnpacked_dev_.extent(0) << endl;
5280 std::cerr << os.str();
5282 using Kokkos::view_alloc;
5283 using Kokkos::WithoutInitializing;
5284 row_ptrs_type row_ptrs_beg(
5285 view_alloc(
"row_ptrs_beg", WithoutInitializing),
5286 rowPtrsUnpacked_dev_.extent(0));
5287 Kokkos::deep_copy(row_ptrs_beg, rowPtrsUnpacked_dev_);
5289 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
5290 size_t(row_ptrs_beg.extent(0) - 1);
5292 std::ostringstream os;
5293 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5294 std::cerr << os.str();
5296 row_ptrs_type row_ptrs_end(
5297 view_alloc(
"row_ptrs_end", WithoutInitializing), N);
5298 row_ptrs_type num_row_entries;
5300 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
5301 if (refill_num_row_entries) {
5305 row_ptrs_type(view_alloc(
"num_row_entries", WithoutInitializing), N);
5306 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
5307 Kokkos::parallel_for
5308 (
"Fill end row pointers", range_policy(0, N),
5309 KOKKOS_LAMBDA (
const size_t i) {
5310 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5317 Kokkos::parallel_for
5318 (
"Fill end row pointers", range_policy(0, N),
5319 KOKKOS_LAMBDA (
const size_t i) {
5320 row_ptrs_end(i) = row_ptrs_beg(i+1);
5324 if (isGloballyIndexed()) {
5326 padding, myRank, verbose);
5329 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
5330 padding, myRank, verbose);
5333 if (refill_num_row_entries) {
5334 Kokkos::parallel_for
5335 (
"Fill num entries", range_policy(0, N),
5336 KOKKOS_LAMBDA (
const size_t i) {
5337 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5339 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5342 std::ostringstream os;
5343 os << *prefix <<
"Reassign k_rowPtrs_; old size: "
5344 << rowPtrsUnpacked_dev_.extent(0) <<
", new size: "
5345 << row_ptrs_beg.extent(0) << endl;
5346 std::cerr << os.str();
5347 TEUCHOS_ASSERT( rowPtrsUnpacked_dev_.extent(0) == row_ptrs_beg.extent(0) );
5350 setRowPtrsUnpacked(row_ptrs_beg);
5352 set_need_sync_host_uvm_access();
5355 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5357 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5359 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5361 const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5362 const size_t numSameIDs,
5363 const Kokkos::DualView<
const local_ordinal_type*,
5364 buffer_device_type>& permuteToLIDs,
5365 const Kokkos::DualView<
const local_ordinal_type*,
5366 buffer_device_type>& permuteFromLIDs,
5367 const bool verbose)
const
5372 std::unique_ptr<std::string> prefix;
5375 "computeCrsPadding(same & permute)");
5376 std::ostringstream os;
5377 os << *prefix <<
"{numSameIDs: " << numSameIDs
5378 <<
", numPermutes: " << permuteFromLIDs.extent(0) <<
"}"
5380 std::cerr << os.str();
5383 const int myRank = [&] () {
5384 auto comm = rowMap_.is_null() ? Teuchos::null :
5386 return comm.is_null() ? -1 : comm->getRank();
5388 std::unique_ptr<padding_type> padding(
5389 new padding_type(myRank, numSameIDs,
5390 permuteFromLIDs.extent(0)));
5392 computeCrsPaddingForSameIDs(*padding, source,
5393 static_cast<LO
>(numSameIDs));
5394 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5399 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5401 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5402 computeCrsPaddingForSameIDs(
5403 padding_type& padding,
5404 const RowGraph<local_ordinal_type, global_ordinal_type,
5406 const local_ordinal_type numSameIDs)
const
5409 using GO = global_ordinal_type;
5410 using Details::Impl::getRowGraphGlobalRow;
5412 const char tfecfFuncName[] =
"computeCrsPaddingForSameIds";
5414 std::unique_ptr<std::string> prefix;
5415 const bool verbose = verbose_;
5417 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5418 std::ostringstream os;
5419 os << *prefix <<
"numSameIDs: " << numSameIDs << endl;
5420 std::cerr << os.str();
5423 if (numSameIDs == 0) {
5427 const map_type& srcRowMap = *(source.getRowMap());
5428 const map_type& tgtRowMap = *rowMap_;
5429 using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5430 const this_type* srcCrs =
dynamic_cast<const this_type*
>(&source);
5431 const bool src_is_unique =
5432 srcCrs ==
nullptr ? false : srcCrs->isMerged();
5433 const bool tgt_is_unique = this->isMerged();
5435 std::vector<GO> srcGblColIndsScratch;
5436 std::vector<GO> tgtGblColIndsScratch;
5438 execute_sync_host_uvm_access();
5439 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5440 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5441 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5442 auto srcGblColInds = getRowGraphGlobalRow(
5443 srcGblColIndsScratch, source, srcGblRowInd);
5444 auto tgtGblColInds = getRowGraphGlobalRow(
5445 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5446 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5447 tgtGblColInds.size(), tgt_is_unique,
5448 srcGblColInds.getRawPtr(),
5449 srcGblColInds.size(), src_is_unique);
5452 std::ostringstream os;
5453 os << *prefix <<
"Done" << endl;
5454 std::cerr << os.str();
5458 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5460 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5461 computeCrsPaddingForPermutedIDs(
5462 padding_type& padding,
5463 const RowGraph<local_ordinal_type, global_ordinal_type,
5465 const Kokkos::DualView<
const local_ordinal_type*,
5466 buffer_device_type>& permuteToLIDs,
5467 const Kokkos::DualView<
const local_ordinal_type*,
5468 buffer_device_type>& permuteFromLIDs)
const
5471 using GO = global_ordinal_type;
5472 using Details::Impl::getRowGraphGlobalRow;
5474 const char tfecfFuncName[] =
"computeCrsPaddingForPermutedIds";
5476 std::unique_ptr<std::string> prefix;
5477 const bool verbose = verbose_;
5479 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5480 std::ostringstream os;
5481 os << *prefix <<
"permuteToLIDs.extent(0): "
5482 << permuteToLIDs.extent(0)
5483 <<
", permuteFromLIDs.extent(0): "
5484 << permuteFromLIDs.extent(0) << endl;
5485 std::cerr << os.str();
5488 if (permuteToLIDs.extent(0) == 0) {
5492 const map_type& srcRowMap = *(source.getRowMap());
5493 const map_type& tgtRowMap = *rowMap_;
5494 using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5495 const this_type* srcCrs =
dynamic_cast<const this_type*
>(&source);
5496 const bool src_is_unique =
5497 srcCrs ==
nullptr ? false : srcCrs->isMerged();
5498 const bool tgt_is_unique = this->isMerged();
5500 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5501 auto permuteToLIDs_h = permuteToLIDs.view_host();
5502 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5503 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5505 std::vector<GO> srcGblColIndsScratch;
5506 std::vector<GO> tgtGblColIndsScratch;
5507 const LO numPermutes =
static_cast<LO
>(permuteToLIDs_h.extent(0));
5509 execute_sync_host_uvm_access();
5510 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5511 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5512 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5513 auto srcGblColInds = getRowGraphGlobalRow(
5514 srcGblColIndsScratch, source, srcGblRowInd);
5515 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5516 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5517 auto tgtGblColInds = getRowGraphGlobalRow(
5518 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5519 padding.update_permute(whichPermute, tgtLclRowInd,
5520 tgtGblColInds.getRawPtr(),
5521 tgtGblColInds.size(), tgt_is_unique,
5522 srcGblColInds.getRawPtr(),
5523 srcGblColInds.size(), src_is_unique);
5527 std::ostringstream os;
5528 os << *prefix <<
"Done" << endl;
5529 std::cerr << os.str();
5533 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5535 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5537 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5538 computeCrsPaddingForImports(
5539 const Kokkos::DualView<
const local_ordinal_type*,
5540 buffer_device_type>& importLIDs,
5541 Kokkos::DualView<packet_type*, buffer_device_type> imports,
5542 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5543 const bool verbose)
const
5545 using Details::Impl::getRowGraphGlobalRow;
5548 using GO = global_ordinal_type;
5549 const char tfecfFuncName[] =
"computeCrsPaddingForImports";
5551 std::unique_ptr<std::string> prefix;
5553 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5554 std::ostringstream os;
5555 os << *prefix <<
"importLIDs.extent(0): "
5556 << importLIDs.extent(0)
5557 <<
", imports.extent(0): "
5558 << imports.extent(0)
5559 <<
", numPacketsPerLID.extent(0): "
5560 << numPacketsPerLID.extent(0) << endl;
5561 std::cerr << os.str();
5564 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
5565 const int myRank = [&] () {
5566 auto comm = rowMap_.is_null() ? Teuchos::null :
5568 return comm.is_null() ? -1 : comm->getRank();
5570 std::unique_ptr<padding_type> padding(
5571 new padding_type(myRank, numImports));
5573 if (imports.need_sync_host()) {
5574 imports.sync_host();
5576 auto imports_h = imports.view_host();
5577 if (numPacketsPerLID.need_sync_host ()) {
5578 numPacketsPerLID.sync_host();
5580 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5582 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5583 auto importLIDs_h = importLIDs.view_host();
5585 const map_type& tgtRowMap = *rowMap_;
5589 constexpr bool src_is_unique =
false;
5590 const bool tgt_is_unique = isMerged();
5592 std::vector<GO> tgtGblColIndsScratch;
5594 execute_sync_host_uvm_access();
5595 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5600 const LO origSrcNumEnt =
5601 static_cast<LO
>(numPacketsPerLID_h[whichImport]);
5602 GO*
const srcGblColInds = imports_h.data() + offset;
5604 const LO tgtLclRowInd = importLIDs_h[whichImport];
5605 const GO tgtGblRowInd =
5606 tgtRowMap.getGlobalElement(tgtLclRowInd);
5607 auto tgtGblColInds = getRowGraphGlobalRow(
5608 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5609 const size_t origTgtNumEnt(tgtGblColInds.size());
5611 padding->update_import(whichImport, tgtLclRowInd,
5612 tgtGblColInds.getRawPtr(),
5613 origTgtNumEnt, tgt_is_unique,
5615 origSrcNumEnt, src_is_unique);
5616 offset += origSrcNumEnt;
5620 std::ostringstream os;
5621 os << *prefix <<
"Done" << endl;
5622 std::cerr << os.str();
5627 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5629 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5631 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5632 computePaddingForCrsMatrixUnpack(
5633 const Kokkos::DualView<
const local_ordinal_type*,
5634 buffer_device_type>& importLIDs,
5635 Kokkos::DualView<char*, buffer_device_type> imports,
5636 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5637 const bool verbose)
const
5639 using Details::Impl::getRowGraphGlobalRow;
5640 using Details::PackTraits;
5643 using GO = global_ordinal_type;
5644 const char tfecfFuncName[] =
"computePaddingForCrsMatrixUnpack";
5646 std::unique_ptr<std::string> prefix;
5648 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5649 std::ostringstream os;
5650 os << *prefix <<
"importLIDs.extent(0): "
5651 << importLIDs.extent(0)
5652 <<
", imports.extent(0): "
5653 << imports.extent(0)
5654 <<
", numPacketsPerLID.extent(0): "
5655 << numPacketsPerLID.extent(0) << endl;
5656 std::cerr << os.str();
5658 const bool extraVerbose =
5661 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
5662 TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5663 const int myRank = [&] () {
5664 auto comm = rowMap_.is_null() ? Teuchos::null :
5666 return comm.is_null() ? -1 : comm->getRank();
5668 std::unique_ptr<padding_type> padding(
5669 new padding_type(myRank, numImports));
5671 if (imports.need_sync_host()) {
5672 imports.sync_host();
5674 auto imports_h = imports.view_host();
5675 if (numPacketsPerLID.need_sync_host ()) {
5676 numPacketsPerLID.sync_host();
5678 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5680 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5681 auto importLIDs_h = importLIDs.view_host();
5683 const map_type& tgtRowMap = *rowMap_;
5687 constexpr bool src_is_unique =
false;
5688 const bool tgt_is_unique = isMerged();
5690 std::vector<GO> srcGblColIndsScratch;
5691 std::vector<GO> tgtGblColIndsScratch;
5693 execute_sync_host_uvm_access();
5694 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5699 const size_t numBytes = numPacketsPerLID_h[whichImport];
5701 std::ostringstream os;
5702 os << *prefix <<
"whichImport=" << whichImport
5703 <<
", numImports=" << numImports
5704 <<
", numBytes=" << numBytes << endl;
5705 std::cerr << os.str();
5707 if (numBytes == 0) {
5710 LO origSrcNumEnt = 0;
5711 const size_t numEntBeg = offset;
5712 const size_t numEntLen =
5713 PackTraits<LO>::packValueCount(origSrcNumEnt);
5714 TEUCHOS_ASSERT( numBytes >= numEntLen );
5715 TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5716 PackTraits<LO>::unpackValue(origSrcNumEnt,
5717 imports_h.data() + numEntBeg);
5719 std::ostringstream os;
5720 os << *prefix <<
"whichImport=" << whichImport
5721 <<
", numImports=" << numImports
5722 <<
", origSrcNumEnt=" << origSrcNumEnt << endl;
5723 std::cerr << os.str();
5725 TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5726 TEUCHOS_ASSERT( numBytes >=
size_t(numEntLen + origSrcNumEnt *
sizeof(GO)) );
5727 const size_t gidsBeg = numEntBeg + numEntLen;
5728 if (srcGblColIndsScratch.size() <
size_t(origSrcNumEnt)) {
5729 srcGblColIndsScratch.resize(origSrcNumEnt);
5731 GO*
const srcGblColInds = srcGblColIndsScratch.data();
5732 PackTraits<GO>::unpackArray(srcGblColInds,
5733 imports_h.data() + gidsBeg,
5735 const LO tgtLclRowInd = importLIDs_h[whichImport];
5736 const GO tgtGblRowInd =
5737 tgtRowMap.getGlobalElement(tgtLclRowInd);
5738 auto tgtGblColInds = getRowGraphGlobalRow(
5739 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5740 const size_t origNumTgtEnt(tgtGblColInds.size());
5743 std::ostringstream os;
5744 os << *prefix <<
"whichImport=" << whichImport
5745 <<
", numImports=" << numImports
5746 <<
": Call padding->update_import" << endl;
5747 std::cerr << os.str();
5749 padding->update_import(whichImport, tgtLclRowInd,
5750 tgtGblColInds.getRawPtr(),
5751 origNumTgtEnt, tgt_is_unique,
5753 origSrcNumEnt, src_is_unique);
5758 std::ostringstream os;
5759 os << *prefix <<
"Done" << endl;
5760 std::cerr << os.str();
5765 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5767 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5769 (
const SrcDistObject& source,
5770 const Kokkos::DualView<
const local_ordinal_type*,
5771 buffer_device_type>& exportLIDs,
5772 Kokkos::DualView<packet_type*,
5773 buffer_device_type>& exports,
5774 Kokkos::DualView<
size_t*,
5775 buffer_device_type> numPacketsPerLID,
5776 size_t& constantNumPackets)
5779 using GO = global_ordinal_type;
5781 using crs_graph_type =
5782 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5783 const char tfecfFuncName[] =
"packAndPrepare: ";
5784 ProfilingRegion region_papn (
"Tpetra::CrsGraph::packAndPrepare");
5786 const bool verbose = verbose_;
5787 std::unique_ptr<std::string> prefix;
5789 prefix = this->
createPrefix(
"CrsGraph",
"packAndPrepare");
5790 std::ostringstream os;
5791 os << *prefix <<
"Start" << endl;
5792 std::cerr << os.str();
5795 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5796 (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5798 "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5799 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5801 const row_graph_type* srcRowGraphPtr =
5802 dynamic_cast<const row_graph_type*
> (&source);
5803 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5804 (srcRowGraphPtr ==
nullptr, std::invalid_argument,
"Source of an Export "
5805 "or Import operation to a CrsGraph must be a RowGraph with the same "
5806 "template parameters.");
5810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5811 (this->isFillComplete (), std::runtime_error,
5812 "The target graph of an Import or Export must not be fill complete.");
5814 const crs_graph_type* srcCrsGraphPtr =
5815 dynamic_cast<const crs_graph_type*
> (&source);
5817 if (srcCrsGraphPtr ==
nullptr) {
5818 using Teuchos::ArrayView;
5822 std::ostringstream os;
5823 os << *prefix <<
"Source is a RowGraph but not a CrsGraph"
5825 std::cerr << os.str();
5832 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5833 auto exportLIDs_h = exportLIDs.view_host ();
5834 ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5835 exportLIDs_h.extent (0));
5836 Teuchos::Array<GO> exports_a;
5838 numPacketsPerLID.clear_sync_state ();
5839 numPacketsPerLID.modify_host ();
5840 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5841 ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5842 numPacketsPerLID_h.extent (0));
5843 srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5844 constantNumPackets);
5845 const size_t newSize =
static_cast<size_t> (exports_a.size ());
5846 if (
static_cast<size_t> (exports.extent (0)) != newSize) {
5847 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5848 exports = exports_dv_type (
"exports", newSize);
5850 Kokkos::View<
const packet_type*, Kokkos::HostSpace,
5851 Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5852 exports.clear_sync_state ();
5853 exports.modify_host ();
5854 Kokkos::deep_copy (exports.view_host (), exports_a_h);
5857 else if (! getColMap ().is_null () &&
5858 (rowPtrsPacked_dev_.extent (0) != 0 ||
5859 getRowMap ()->getNodeNumElements () == 0)) {
5861 std::ostringstream os;
5862 os << *prefix <<
"packCrsGraphNew path" << endl;
5863 std::cerr << os.str();
5865 using export_pids_type =
5866 Kokkos::DualView<const int*, buffer_device_type>;
5867 export_pids_type exportPIDs;
5869 using NT = node_type;
5871 packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5872 exports, numPacketsPerLID,
5873 constantNumPackets,
false);
5876 srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5877 constantNumPackets);
5881 std::ostringstream os;
5882 os << *prefix <<
"Done" << endl;
5883 std::cerr << os.str();
5887 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5891 Teuchos::Array<GlobalOrdinal>& exports,
5895 auto col_map = this->getColMap();
5897 if( !
col_map.is_null() && (rowPtrsPacked_dev_.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
5908 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5912 Teuchos::Array<GlobalOrdinal>& exports,
5919 using host_execution_space =
5920 typename Kokkos::View<size_t*, device_type>::
5921 HostMirror::execution_space;
5923 const bool verbose = verbose_;
5926 std::unique_ptr<std::string>
prefix;
5928 prefix = this->createPrefix(
"CrsGraph",
"allocateIndices");
5929 std::ostringstream
os;
5931 std::cerr <<
os.str();
5933 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5934 (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5935 "exportLIDs.size() = " << numExportLIDs <<
" != numPacketsPerLID.size()"
5936 " = " << numPacketsPerLID.size () <<
".");
5938 const map_type&
rowMap = * (this->getRowMap ());
5939 const map_type*
const colMapPtr = this->colMap_.getRawPtr ();
5940 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5941 (this->isLocallyIndexed () && colMapPtr ==
nullptr, std::logic_error,
5942 "This graph claims to be locally indexed, but its column Map is nullptr. "
5943 "This should never happen. Please report this bug to the Tpetra "
5947 constantNumPackets = 0;
5951 size_t*
const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5952 const LO*
const exportLIDs_raw = exportLIDs.getRawPtr ();
5959 Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5960 size_t totalNumPackets = 0;
5961 size_t errCount = 0;
5964 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5966 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5967 constexpr size_t ONE = 1;
5969 execute_sync_host_uvm_access();
5970 Kokkos::parallel_reduce (
"Tpetra::CrsGraph::pack: totalNumPackets",
5972 [=] (
const LO& i,
size_t& curTotalNumPackets) {
5973 const GO gblRow =
rowMap.getGlobalElement (exportLIDs_raw[i]);
5974 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5975 Kokkos::atomic_add (&errCountView(), ONE);
5976 numPacketsPerLID_raw[i] = 0;
5979 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5980 numPacketsPerLID_raw[i] = numEnt;
5981 curTotalNumPackets += numEnt;
5987 std::ostringstream os;
5988 os << *prefix <<
"totalNumPackets=" << totalNumPackets << endl;
5989 std::cerr << os.str();
5991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5992 (errCount != 0, std::logic_error,
"totalNumPackets count encountered "
5993 "one or more errors! errCount = " << errCount
5994 <<
", totalNumPackets = " << totalNumPackets <<
".");
5998 exports.resize (totalNumPackets);
6000 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6001 (! this->supportsRowViews (), std::logic_error,
6002 "this->supportsRowViews() returns false; this should never happen. "
6003 "Please report this bug to the Tpetra developers.");
6009 std::ostringstream os;
6010 os << *prefix <<
"Pack into exports" << endl;
6011 std::cerr << os.str();
6016 GO*
const exports_raw = exports.getRawPtr ();
6018 Kokkos::parallel_scan (
"Tpetra::CrsGraph::pack: pack from views",
6019 inputRange, [=, &prefix]
6020 (
const LO i,
size_t& exportsOffset,
const bool final) {
6021 const size_t curOffset = exportsOffset;
6022 const GO gblRow =
rowMap.getGlobalElement (exportLIDs_raw[i]);
6023 const RowInfo rowInfo =
6024 this->getRowInfoFromGlobalRowIndex (gblRow);
6026 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
6027 if (rowInfo.localRow == TDO::invalid ()) {
6029 std::ostringstream os;
6030 os << *prefix <<
": INVALID rowInfo: i=" << i
6031 <<
", lclRow=" << exportLIDs_raw[i] << endl;
6032 std::cerr << os.str();
6034 Kokkos::atomic_add (&errCountView(), ONE);
6036 else if (curOffset + rowInfo.numEntries > totalNumPackets) {
6038 std::ostringstream os;
6039 os << *prefix <<
": UH OH! For i=" << i <<
", lclRow="
6040 << exportLIDs_raw[i] <<
", gblRow=" << gblRow <<
", curOffset "
6041 "(= " << curOffset <<
") + numEnt (= " << rowInfo.numEntries
6042 <<
") > totalNumPackets (= " << totalNumPackets <<
")."
6044 std::cerr << os.str();
6046 Kokkos::atomic_add (&errCountView(), ONE);
6049 const LO numEnt =
static_cast<LO
> (rowInfo.numEntries);
6050 if (this->isLocallyIndexed ()) {
6051 auto lclColInds = getLocalIndsViewHost (rowInfo);
6053 for (LO k = 0; k < numEnt; ++k) {
6054 const LO lclColInd = lclColInds(k);
6055 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6059 exports_raw[curOffset + k] = gblColInd;
6062 exportsOffset = curOffset + numEnt;
6064 else if (this->isGloballyIndexed ()) {
6065 auto gblColInds = getGlobalIndsViewHost (rowInfo);
6067 for (LO k = 0; k < numEnt; ++k) {
6068 const GO gblColInd = gblColInds(k);
6072 exports_raw[curOffset + k] = gblColInd;
6075 exportsOffset = curOffset + numEnt;
6083 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6084 (errCount != 0, std::logic_error,
"Packing encountered "
6085 "one or more errors! errCount = " << errCount
6086 <<
", totalNumPackets = " << totalNumPackets <<
".");
6089 std::ostringstream os;
6090 os << *prefix <<
"Done" << endl;
6091 std::cerr << os.str();
6095 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6097 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6098 packFillActiveNew (
const Kokkos::DualView<
const local_ordinal_type*,
6099 buffer_device_type>& exportLIDs,
6100 Kokkos::DualView<packet_type*,
6101 buffer_device_type>& exports,
6102 Kokkos::DualView<
size_t*,
6103 buffer_device_type> numPacketsPerLID,
6104 size_t& constantNumPackets)
const
6108 using GO = global_ordinal_type;
6109 using host_execution_space =
typename Kokkos::View<
size_t*,
6110 device_type>::HostMirror::execution_space;
6111 using host_device_type =
6112 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
6113 using exports_dv_type =
6114 Kokkos::DualView<packet_type*, buffer_device_type>;
6115 const char tfecfFuncName[] =
"packFillActiveNew: ";
6116 const bool verbose = verbose_;
6118 const auto numExportLIDs = exportLIDs.extent (0);
6119 std::unique_ptr<std::string> prefix;
6121 prefix = this->
createPrefix(
"CrsGraph",
"packFillActiveNew");
6122 std::ostringstream os;
6123 os << *prefix <<
"numExportLIDs: " << numExportLIDs
6124 <<
", numPacketsPerLID.extent(0): "
6125 << numPacketsPerLID.extent(0) << endl;
6126 std::cerr << os.str();
6128 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6129 (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6130 "exportLIDs.extent(0) = " << numExportLIDs
6131 <<
" != numPacketsPerLID.extent(0) = "
6132 << numPacketsPerLID.extent (0) <<
".");
6133 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6134 auto exportLIDs_h = exportLIDs.view_host ();
6136 const map_type&
rowMap = * (this->getRowMap ());
6137 const map_type*
const colMapPtr = this->colMap_.getRawPtr ();
6138 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6139 (this->isLocallyIndexed () && colMapPtr ==
nullptr, std::logic_error,
6140 "This graph claims to be locally indexed, but its column Map is nullptr. "
6141 "This should never happen. Please report this bug to the Tpetra "
6145 constantNumPackets = 0;
6147 numPacketsPerLID.clear_sync_state ();
6148 numPacketsPerLID.modify_host ();
6149 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6156 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6157 range_type inputRange (0, numExportLIDs);
6158 size_t totalNumPackets = 0;
6159 size_t errCount = 0;
6162 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6163 constexpr size_t ONE = 1;
6166 std::ostringstream os;
6167 os << *prefix <<
"Compute totalNumPackets" << endl;
6168 std::cerr << os.str ();
6171 execute_sync_host_uvm_access();
6172 Kokkos::parallel_reduce
6173 (
"Tpetra::CrsGraph::pack: totalNumPackets",
6175 [=, &prefix] (
const LO i,
size_t& curTotalNumPackets) {
6176 const LO lclRow = exportLIDs_h[i];
6177 const GO gblRow =
rowMap.getGlobalElement (lclRow);
6178 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6180 std::ostringstream os;
6181 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
6182 <<
" not in row Map on this process" << endl;
6183 std::cerr << os.str();
6185 Kokkos::atomic_add (&errCountView(), ONE);
6186 numPacketsPerLID_h(i) = 0;
6189 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6190 numPacketsPerLID_h(i) = numEnt;
6191 curTotalNumPackets += numEnt;
6197 std::ostringstream os;
6198 os << *prefix <<
"totalNumPackets: " << totalNumPackets
6199 <<
", errCount: " << errCount << endl;
6200 std::cerr << os.str ();
6202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6203 (errCount != 0, std::logic_error,
"totalNumPackets count encountered "
6204 "one or more errors! totalNumPackets: " << totalNumPackets
6205 <<
", errCount: " << errCount <<
".");
6208 if (
size_t(exports.extent (0)) < totalNumPackets) {
6210 exports = exports_dv_type (
"exports", totalNumPackets);
6213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6214 (! this->supportsRowViews (), std::logic_error,
6215 "this->supportsRowViews() returns false; this should never happen. "
6216 "Please report this bug to the Tpetra developers.");
6222 std::ostringstream os;
6223 os << *prefix <<
"Pack into exports buffer" << endl;
6224 std::cerr << os.str();
6227 exports.clear_sync_state ();
6228 exports.modify_host ();
6229 auto exports_h = exports.view_host ();
6232 Kokkos::parallel_scan
6233 (
"Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6234 inputRange, [=, &prefix]
6235 (
const LO i,
size_t& exportsOffset,
const bool final) {
6236 const size_t curOffset = exportsOffset;
6237 const LO lclRow = exportLIDs_h(i);
6238 const GO gblRow =
rowMap.getGlobalElement (lclRow);
6239 if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6241 std::ostringstream os;
6242 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
6243 <<
" not in row Map on this process" << endl;
6244 std::cerr << os.str();
6246 Kokkos::atomic_add (&errCountView(), ONE);
6250 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6251 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6253 std::ostringstream os;
6254 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
6255 <<
", gblRow=" << gblRow <<
": invalid rowInfo"
6257 std::cerr << os.str();
6259 Kokkos::atomic_add (&errCountView(), ONE);
6263 if (curOffset + rowInfo.numEntries > totalNumPackets) {
6265 std::ostringstream os;
6266 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
6267 <<
", gblRow=" << gblRow <<
", curOffset (= "
6268 << curOffset <<
") + numEnt (= " << rowInfo.numEntries
6269 <<
") > totalNumPackets (= " << totalNumPackets
6271 std::cerr << os.str();
6273 Kokkos::atomic_add (&errCountView(), ONE);
6277 const LO numEnt =
static_cast<LO
> (rowInfo.numEntries);
6278 if (this->isLocallyIndexed ()) {
6279 auto lclColInds = getLocalIndsViewHost(rowInfo);
6281 for (LO k = 0; k < numEnt; ++k) {
6282 const LO lclColInd = lclColInds(k);
6283 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6287 exports_h(curOffset + k) = gblColInd;
6290 exportsOffset = curOffset + numEnt;
6292 else if (this->isGloballyIndexed ()) {
6293 auto gblColInds = getGlobalIndsViewHost(rowInfo);
6295 for (LO k = 0; k < numEnt; ++k) {
6296 const GO gblColInd = gblColInds(k);
6300 exports_h(curOffset + k) = gblColInd;
6303 exportsOffset = curOffset + numEnt;
6316 std::ostringstream os;
6317 os << *prefix <<
"errCount=" << errCount <<
"; Done" << endl;
6318 std::cerr << os.str();
6322 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6324 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6326 (
const Kokkos::DualView<
const local_ordinal_type*,
6327 buffer_device_type>& importLIDs,
6328 Kokkos::DualView<packet_type*,
6329 buffer_device_type> imports,
6330 Kokkos::DualView<
size_t*,
6331 buffer_device_type> numPacketsPerLID,
6335 using Details::ProfilingRegion;
6338 using GO = global_ordinal_type;
6339 const char tfecfFuncName[] =
"unpackAndCombine";
6341 ProfilingRegion regionCGC(
"Tpetra::CrsGraph::unpackAndCombine");
6342 const bool verbose = verbose_;
6344 std::unique_ptr<std::string> prefix;
6346 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
6347 std::ostringstream os;
6348 os << *prefix <<
"Start" << endl;
6349 std::cerr << os.str ();
6352 auto padding = computeCrsPaddingForImports(
6353 importLIDs, imports, numPacketsPerLID, verbose);
6354 applyCrsPadding(*padding, verbose);
6356 std::ostringstream os;
6357 os << *prefix <<
"Done computing & applying padding" << endl;
6358 std::cerr << os.str ();
6379 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6380 (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6381 std::runtime_error,
": importLIDs.extent(0) = "
6382 << importLIDs.extent (0) <<
" != numPacketsPerLID.extent(0) = "
6383 << numPacketsPerLID.extent (0) <<
".");
6384 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6385 (isFillComplete (), std::runtime_error,
6386 ": Import or Export operations are not allowed on a target "
6387 "CrsGraph that is fillComplete.");
6389 const size_t numImportLIDs(importLIDs.extent(0));
6390 if (numPacketsPerLID.need_sync_host()) {
6391 numPacketsPerLID.sync_host();
6393 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6394 if (imports.need_sync_host()) {
6395 imports.sync_host();
6397 auto imports_h = imports.view_host();
6398 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6399 auto importLIDs_h = importLIDs.view_host();
6402 Teuchos::Array<LO> lclColInds;
6403 if (isLocallyIndexed()) {
6405 std::ostringstream os;
6406 os << *prefix <<
"Preallocate local indices scratch" << endl;
6407 std::cerr << os.str();
6409 size_t maxNumInserts = 0;
6410 for (
size_t i = 0; i < numImportLIDs; ++i) {
6411 maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6414 std::ostringstream os;
6415 os << *prefix <<
"Local indices scratch size: "
6416 << maxNumInserts << endl;
6417 std::cerr << os.str();
6419 lclColInds.resize (maxNumInserts);
6423 std::ostringstream os;
6425 if (isGloballyIndexed()) {
6426 os <<
"Graph is globally indexed";
6429 os <<
"Graph is neither locally nor globally indexed";
6432 std::cerr << os.str();
6436 TEUCHOS_ASSERT( ! rowMap_.is_null() );
6437 const map_type&
rowMap = *rowMap_;
6440 size_t importsOffset = 0;
6441 for (
size_t i = 0; i < numImportLIDs; ++i) {
6443 std::ostringstream os;
6444 os << *prefix <<
"i=" << i <<
", numImportLIDs="
6445 << numImportLIDs << endl;
6446 std::cerr << os.str();
6450 const LO lclRow = importLIDs_h[i];
6451 const GO gblRow =
rowMap.getGlobalElement(lclRow);
6452 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6453 (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6454 std::logic_error,
"importLIDs[i=" << i <<
"]="
6455 << lclRow <<
" is not in the row Map on the calling "
6457 const LO numEnt = numPacketsPerLID_h[i];
6458 const GO*
const gblColInds = (numEnt == 0) ?
nullptr :
6459 imports_h.data() + importsOffset;
6460 if (! isLocallyIndexed()) {
6461 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6466 for (LO j = 0; j < numEnt; j++) {
6467 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6469 insertLocalIndices(lclRow, numEnt, lclColInds.data());
6471 importsOffset += numEnt;
6474 catch (std::exception& e) {
6475 TEUCHOS_TEST_FOR_EXCEPTION
6476 (
true, std::runtime_error,
6477 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6478 "exception: " << endl << e.what());
6482 std::ostringstream os;
6483 os << *prefix <<
"Done" << endl;
6484 std::cerr << os.str();
6488 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6493 using Teuchos::Comm;
6494 using Teuchos::null;
6495 using Teuchos::ParameterList;
6509 if (! domainMap_.is_null ()) {
6510 if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6520 if (! rangeMap_.is_null ()) {
6521 if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6531 if (! colMap_.is_null ()) {
6545 if (! rangeMap_.is_null () &&
6548 if (
params.is_null () || !
params->isSublist (
"Export")) {
6557 if (! domainMap_.is_null () &&
6560 if (
params.is_null () || !
params->isSublist (
"Import")) {
6586 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6589 getLocalDiagOffsets (
const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets)
const
6595 const bool verbose = verbose_;
6597 std::unique_ptr<std::string>
prefix;
6599 prefix = this->createPrefix(
"CrsGraph",
"getLocalDiagOffsets");
6600 std::ostringstream
os;
6601 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
6603 std::cerr <<
os.str();
6607 (! hasColMap (), std::runtime_error,
"The graph must have a column Map.");
6608 const LO
lclNumRows =
static_cast<LO
> (this->getNodeNumRows ());
6610 (
static_cast<LO
> (offsets.extent (0)) <
lclNumRows,
6611 std::invalid_argument,
"offsets.extent(0) = " <<
6612 offsets.extent (0) <<
" < getNodeNumRows() = " <<
lclNumRows <<
".");
6637 const bool sorted = this->isSorted ();
6638 if (isFillComplete ()) {
6639 auto lclGraph = this->getLocalGraphDevice ();
6648 auto offsets_h = Kokkos::create_mirror_view (offsets);
6658 if (
lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6668 const size_t hint = 0;
6679 typename local_inds_dualv_type::t_host::const_type
lclColInds;
6716 std::ostringstream
os;
6717 os << *
prefix <<
"Wrong offsets: [";
6726 std::cerr <<
os.str();
6730 using Teuchos::reduceAll;
6732 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6756 std::ostringstream
os;
6757 os <<
"Issue(s) that we noticed (on Process " <<
gblResults[4] <<
", "
6758 "possibly among others): " <<
endl;
6760 os <<
" - The column Map does not contain at least one diagonal entry "
6761 "of the graph." <<
endl;
6764 os <<
" - On one or more processes, some row does not contain a "
6765 "diagonal entry." <<
endl;
6768 os <<
" - On one or more processes, some offsets are incorrect."
6772 os <<
" - One or more processes had some other error."
6780 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6787 const bool verbose = verbose_;
6789 std::unique_ptr<std::string>
prefix;
6791 prefix = this->createPrefix(
"CrsGraph",
"getLocalOffRankOffsets");
6792 std::ostringstream
os;
6793 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
6795 std::cerr <<
os.str();
6799 (! hasColMap (), std::runtime_error,
"The graph must have a column Map.");
6802 const size_t lclNumRows = this->getNodeNumRows ();
6804 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) ==
lclNumRows+1) {
6805 offsets = k_offRankOffsets_;
6808 haveLocalOffRankOffsets_ =
false;
6809 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing(
"offRankOffset"),
lclNumRows+1);
6810 offsets = k_offRankOffsets_;
6825 if (isFillComplete ()) {
6826 auto lclGraph = this->getLocalGraphDevice ();
6827 ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6830 haveLocalOffRankOffsets_ =
true;
6854 std::is_same<
typename DeviceType::memory_space,
6855 Kokkos::HostSpace>::value>
6856 struct HelpGetLocalDiagOffsets {};
6858 template<
class DeviceType>
6859 struct HelpGetLocalDiagOffsets<DeviceType, true> {
6860 typedef DeviceType device_type;
6861 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6862 Kokkos::MemoryUnmanaged> device_offsets_type;
6863 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6864 Kokkos::MemoryUnmanaged> host_offsets_type;
6866 static device_offsets_type
6867 getDeviceOffsets (
const host_offsets_type& hostOffsets)
6875 copyBackIfNeeded (
const host_offsets_type& ,
6876 const device_offsets_type& )
6880 template<
class DeviceType>
6881 struct HelpGetLocalDiagOffsets<DeviceType, false> {
6882 typedef DeviceType device_type;
6886 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6887 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6888 Kokkos::MemoryUnmanaged> host_offsets_type;
6890 static device_offsets_type
6891 getDeviceOffsets (
const host_offsets_type& hostOffsets)
6895 return device_offsets_type (
"offsets", hostOffsets.extent (0));
6899 copyBackIfNeeded (
const host_offsets_type& hostOffsets,
6900 const device_offsets_type& deviceOffsets)
6902 Kokkos::deep_copy (hostOffsets, deviceOffsets);
6908 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6916 (! this->hasColMap (), std::runtime_error,
6917 "The graph does not yet have a column Map.");
6918 const LO
myNumRows =
static_cast<LO
> (this->getNodeNumRows ());
6919 if (
static_cast<LO
> (offsets.size ()) !=
myNumRows) {
6936 typedef typename helper_type::host_offsets_type host_offsets_type;
6946 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6953 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6957 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>&
rowTransfer,
6958 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > &
domainTransfer,
6959 const Teuchos::RCP<const map_type>& domainMap,
6960 const Teuchos::RCP<const map_type>&
rangeMap,
6961 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
6967 using Teuchos::ArrayRCP;
6968 using Teuchos::ArrayView;
6969 using Teuchos::Comm;
6970 using Teuchos::ParameterList;
6973#ifdef HAVE_TPETRA_MMM_TIMINGS
6975 using Teuchos::TimeMonitor;
6980 using NT = node_type;
6984 const char*
prefix =
"Tpetra::CrsGraph::transferAndFillComplete: ";
6986#ifdef HAVE_TPETRA_MMM_TIMINGS
7003 prefix <<
"The 'rowTransfer' input argument must be either an Import or "
7004 "an Export, and its template parameters must match the corresponding "
7005 "template parameters of the CrsGraph.");
7021 prefix <<
"The 'domainTransfer' input argument must be either an "
7022 "Import or an Export, and its template parameters must match the "
7023 "corresponding template parameters of the CrsGraph.");
7029 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
7030 "must be of the same type (either Import or Export).");
7036 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
7037 "must be of the same type (either Import or Export).");
7044 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7050 bool reverseMode =
false;
7051 bool restrictComm =
false;
7052 RCP<ParameterList> graphparams;
7053 if (! params.is_null()) {
7054 reverseMode = params->get(
"Reverse Mode", reverseMode);
7055 restrictComm = params->get(
"Restrict Communicator", restrictComm);
7056 graphparams = sublist(params,
"CrsGraph");
7061 RCP<const map_type> MyRowMap = reverseMode ?
7062 rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7063 RCP<const map_type> MyColMap;
7064 RCP<const map_type> MyDomainMap = !
domainMap.is_null() ?
domainMap : getDomainMap();
7065 RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
7066 RCP<const map_type> BaseRowMap = MyRowMap;
7067 RCP<const map_type> BaseDomainMap = MyDomainMap;
7075 if (! destGraph.is_null()) {
7086 const bool NewFlag =
7087 ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
7088 TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
7089 prefix <<
"The input argument 'destGraph' is only allowed to be nonnull, "
7090 "if its graph is empty (neither locally nor globally indexed).");
7099 TEUCHOS_TEST_FOR_EXCEPTION(
7100 ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7101 prefix <<
"The (row) Map of the input argument 'destGraph' is not the "
7102 "same as the (row) Map specified by the input argument 'rowTransfer'.");
7104 TEUCHOS_TEST_FOR_EXCEPTION(
7105 ! destGraph->checkSizes(*
this), std::invalid_argument,
7106 prefix <<
"You provided a nonnull destination graph, but checkSizes() "
7107 "indicates that it is not a legal legal target for redistribution from "
7108 "the source graph (*this). This may mean that they do not have the "
7109 "same dimensions.");
7123 TEUCHOS_TEST_FOR_EXCEPTION(
7124 ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7125 std::invalid_argument, prefix <<
7126 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7128 TEUCHOS_TEST_FOR_EXCEPTION(
7129 ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7130 std::invalid_argument, prefix <<
7131 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7134 TEUCHOS_TEST_FOR_EXCEPTION(
7135 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*
domainMap),
7136 std::invalid_argument,
7137 prefix <<
"The target map of the 'domainTransfer' input argument must be "
7138 "the same as the rebalanced domain map 'domainMap'");
7140 TEUCHOS_TEST_FOR_EXCEPTION(
7141 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*
domainMap),
7142 std::invalid_argument,
7143 prefix <<
"The source map of the 'domainTransfer' input argument must be "
7144 "the same as the rebalanced domain map 'domainMap'");
7157 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7158 ArrayView<const LO> ExportLIDs = reverseMode ?
7159 rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7160 ArrayView<const LO> RemoteLIDs = reverseMode ?
7161 rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7162 ArrayView<const LO> PermuteToLIDs = reverseMode ?
7163 rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7164 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7165 rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7166 Distributor& Distor = rowTransfer.getDistributor();
7169 Teuchos::Array<int> SourcePids;
7170 Teuchos::Array<int> TargetPids;
7171 int MyPID = getComm()->getRank();
7174 RCP<const map_type> ReducedRowMap, ReducedColMap,
7175 ReducedDomainMap, ReducedRangeMap;
7176 RCP<const Comm<int> > ReducedComm;
7180 if (destGraph.is_null()) {
7181 destGraph = rcp(
new this_type(MyRowMap, 0, StaticProfile, graphparams));
7188 ReducedRowMap = MyRowMap->removeEmptyProcesses();
7189 ReducedComm = ReducedRowMap.is_null() ?
7191 ReducedRowMap->getComm();
7192 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7194 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7196 MyDomainMap->replaceCommWithSubset(ReducedComm);
7197 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7199 MyRangeMap->replaceCommWithSubset(ReducedComm);
7202 MyRowMap = ReducedRowMap;
7203 MyDomainMap = ReducedDomainMap;
7204 MyRangeMap = ReducedRangeMap;
7207 if (! ReducedComm.is_null()) {
7208 MyPID = ReducedComm->getRank();
7215 ReducedComm = MyRowMap->getComm();
7221#ifdef HAVE_TPETRA_MMM_TIMINGS
7223 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"ImportSetup"))));
7226 RCP<const import_type> MyImporter = getImporter();
7229 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7231 if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7238 Import_Util::getPids(*MyImporter, SourcePids,
false);
7240 else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7243 ivector_type SourceDomain_pids(getDomainMap(),
true);
7244 ivector_type SourceCol_pids(getColMap());
7246 SourceDomain_pids.putScalar(MyPID);
7248 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
7249 SourcePids.resize(getColMap()->getNodeNumElements());
7250 SourceCol_pids.get1dCopy(SourcePids());
7252 else if (MyImporter.is_null() && bSameDomainMap) {
7254 SourcePids.resize(getColMap()->getNodeNumElements());
7255 SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7257 else if ( ! MyImporter.is_null() &&
7258 ! domainTransfer.is_null() ) {
7265 ivector_type TargetDomain_pids(
domainMap);
7266 TargetDomain_pids.putScalar(MyPID);
7269 ivector_type SourceDomain_pids(getDomainMap());
7272 ivector_type SourceCol_pids(getColMap());
7274 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7275 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
7277 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7278 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
7280 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7281 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
7283 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7284 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
7287 TEUCHOS_TEST_FOR_EXCEPTION(
7288 true, std::logic_error,
7289 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
7291 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
7292 SourcePids.resize(getColMap()->getNodeNumElements());
7293 SourceCol_pids.get1dCopy(SourcePids());
7295 else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7296 getDomainMap()->isSameAs(*getRowMap())) {
7299 ivector_type SourceRow_pids(getRowMap());
7300 ivector_type SourceCol_pids(getColMap());
7302 TargetRow_pids.putScalar(MyPID);
7303 if (! reverseMode && xferAsImport !=
nullptr) {
7304 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport,
INSERT);
7306 else if (reverseMode && xferAsExport !=
nullptr) {
7307 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport,
INSERT);
7309 else if (! reverseMode && xferAsExport !=
nullptr) {
7310 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport,
INSERT);
7312 else if (reverseMode && xferAsImport !=
nullptr) {
7313 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport,
INSERT);
7316 TEUCHOS_TEST_FOR_EXCEPTION(
7317 true, std::logic_error,
7318 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
7320 SourceCol_pids.doImport(SourceRow_pids, *MyImporter,
INSERT);
7321 SourcePids.resize(getColMap()->getNodeNumElements());
7322 SourceCol_pids.get1dCopy(SourcePids());
7325 TEUCHOS_TEST_FOR_EXCEPTION(
7326 true, std::invalid_argument,
7327 prefix <<
"This method only allows either domainMap == getDomainMap(), "
7328 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7332 size_t constantNumPackets = destGraph->constantNumberOfPackets();
7333 if (constantNumPackets == 0) {
7334 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7342 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7343 destGraph->reallocImportsIfNeeded(rbufLen,
false,
nullptr);
7348 destGraph->numExportPacketsPerLID_.modify_host();
7349 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7354 numExportPacketsPerLID, ExportLIDs,
7355 SourcePids, constantNumPackets);
7359#ifdef HAVE_TPETRA_MMM_TIMINGS
7361 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"Transfer"))));
7364 if (communication_needed) {
7366 if (constantNumPackets == 0) {
7370 destGraph->numExportPacketsPerLID_.sync_host();
7371 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7373 destGraph->numImportPacketsPerLID_.sync_host();
7374 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7376 Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7377 numImportPacketsPerLID);
7378 size_t totalImportPackets = 0;
7380 totalImportPackets += numImportPacketsPerLID[i];
7385 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
7386 destGraph->imports_.modify_host();
7387 Teuchos::ArrayView<packet_type> hostImports =
7391 destGraph->exports_.sync_host();
7392 Teuchos::ArrayView<const packet_type> hostExports =
7394 Distor.doReversePostsAndWaits(hostExports,
7395 numExportPacketsPerLID,
7397 numImportPacketsPerLID);
7400 destGraph->imports_.modify_host();
7401 Teuchos::ArrayView<packet_type> hostImports =
7405 destGraph->exports_.sync_host();
7406 Teuchos::ArrayView<const packet_type> hostExports =
7408 Distor.doReversePostsAndWaits(hostExports,
7414 if (constantNumPackets == 0) {
7418 destGraph->numExportPacketsPerLID_.sync_host();
7419 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7421 destGraph->numImportPacketsPerLID_.sync_host();
7422 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7424 Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7425 numImportPacketsPerLID);
7426 size_t totalImportPackets = 0;
7428 totalImportPackets += numImportPacketsPerLID[i];
7433 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
7434 destGraph->imports_.modify_host();
7435 Teuchos::ArrayView<packet_type> hostImports =
7439 destGraph->exports_.sync_host();
7440 Teuchos::ArrayView<const packet_type> hostExports =
7442 Distor.doPostsAndWaits(hostExports,
7443 numExportPacketsPerLID,
7445 numImportPacketsPerLID);
7448 destGraph->imports_.modify_host();
7449 Teuchos::ArrayView<packet_type> hostImports =
7453 destGraph->exports_.sync_host();
7454 Teuchos::ArrayView<const packet_type> hostExports =
7456 Distor.doPostsAndWaits(hostExports,
7467#ifdef HAVE_TPETRA_MMM_TIMINGS
7469 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"Unpack-1"))));
7473 destGraph->numImportPacketsPerLID_.sync_host();
7474 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7476 destGraph->imports_.sync_host();
7477 Teuchos::ArrayView<const packet_type> hostImports =
7481 numImportPacketsPerLID,
7482 constantNumPackets,
INSERT,
7483 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7484 size_t N = BaseRowMap->getNodeNumElements();
7487 ArrayRCP<size_t> CSR_rowptr(N+1);
7488 ArrayRCP<GO> CSR_colind_GID;
7489 ArrayRCP<LO> CSR_colind_LID;
7490 CSR_colind_GID.resize(mynnz);
7494 if (
typeid(LO) ==
typeid(GO)) {
7495 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7498 CSR_colind_LID.resize(mynnz);
7507 numImportPacketsPerLID, constantNumPackets,
7508 INSERT, NumSameIDs, PermuteToLIDs,
7509 PermuteFromLIDs, N, mynnz, MyPID,
7510 CSR_rowptr(), CSR_colind_GID(),
7511 SourcePids(), TargetPids);
7516#ifdef HAVE_TPETRA_MMM_TIMINGS
7518 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"Unpack-2"))));
7523 Teuchos::Array<int> RemotePids;
7524 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7528 TargetPids, RemotePids,
7535 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7537 MyColMap->replaceCommWithSubset(ReducedComm);
7538 MyColMap = ReducedColMap;
7542 destGraph->replaceColMap(MyColMap);
7549 if (ReducedComm.is_null()) {
7556 if ((! reverseMode && xferAsImport !=
nullptr) ||
7557 (reverseMode && xferAsExport !=
nullptr)) {
7558 Import_Util::sortCrsEntries(CSR_rowptr(),
7561 else if ((! reverseMode && xferAsExport !=
nullptr) ||
7562 (reverseMode && xferAsImport !=
nullptr)) {
7563 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7565 if (CSR_rowptr[N] != mynnz) {
7566 CSR_colind_LID.resize(CSR_rowptr[N]);
7570 TEUCHOS_TEST_FOR_EXCEPTION(
7571 true, std::logic_error,
7572 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
7580 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7586 Teuchos::ParameterList esfc_params;
7587#ifdef HAVE_TPETRA_MMM_TIMINGS
7589 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"CreateImporter"))));
7591 RCP<import_type> MyImport = rcp(
new import_type(MyDomainMap, MyColMap, RemotePids));
7592#ifdef HAVE_TPETRA_MMM_TIMINGS
7594 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+
string(
"ESFC"))));
7596 esfc_params.set(
"Timer Label",prefix + std::string(
"TAFC"));
7598 if(!params.is_null())
7599 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
7601 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7602 MyImport, Teuchos::null, rcp(&esfc_params,
false));
7606 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7611 const Teuchos::RCP<const map_type>& domainMap,
7612 const Teuchos::RCP<const map_type>&
rangeMap,
7613 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7618 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7624 const Teuchos::RCP<const map_type>& domainMap,
7625 const Teuchos::RCP<const map_type>&
rangeMap,
7626 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7631 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7636 const Teuchos::RCP<const map_type>& domainMap,
7637 const Teuchos::RCP<const map_type>&
rangeMap,
7638 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7643 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7649 const Teuchos::RCP<const map_type>& domainMap,
7650 const Teuchos::RCP<const map_type>&
rangeMap,
7651 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7657 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7662 std::swap(
graph.need_sync_host_uvm_access,
this->need_sync_host_uvm_access);
7664 std::swap(
graph.rowMap_,
this->rowMap_);
7665 std::swap(
graph.colMap_,
this->colMap_);
7666 std::swap(
graph.rangeMap_,
this->rangeMap_);
7667 std::swap(
graph.domainMap_,
this->domainMap_);
7669 std::swap(
graph.importer_,
this->importer_);
7670 std::swap(
graph.exporter_,
this->exporter_);
7672 std::swap(
graph.rowPtrsPacked_dev_,
this->rowPtrsPacked_dev_);
7673 std::swap(
graph.rowPtrsPacked_host_,
this->rowPtrsPacked_host_);
7675 std::swap(
graph.nodeMaxNumRowEntries_,
this->nodeMaxNumRowEntries_);
7677 std::swap(
graph.globalNumEntries_,
this->globalNumEntries_);
7678 std::swap(
graph.globalMaxNumRowEntries_,
this->globalMaxNumRowEntries_);
7680 std::swap(
graph.numAllocForAllRows_,
this->numAllocForAllRows_);
7682 std::swap(
graph.rowPtrsUnpacked_dev_,
this->rowPtrsUnpacked_dev_);
7683 std::swap(
graph.rowPtrsUnpacked_host_,
this->rowPtrsUnpacked_host_);
7684 std::swap(
graph.k_offRankOffsets_,
this->k_offRankOffsets_);
7686 std::swap(
graph.lclIndsUnpacked_wdv,
this->lclIndsUnpacked_wdv);
7687 std::swap(
graph.gblInds_wdv,
this->gblInds_wdv);
7688 std::swap(
graph.lclIndsPacked_wdv,
this->lclIndsPacked_wdv);
7690 std::swap(
graph.storageStatus_,
this->storageStatus_);
7692 std::swap(
graph.indicesAreAllocated_,
this->indicesAreAllocated_);
7693 std::swap(
graph.indicesAreLocal_,
this->indicesAreLocal_);
7694 std::swap(
graph.indicesAreGlobal_,
this->indicesAreGlobal_);
7695 std::swap(
graph.fillComplete_,
this->fillComplete_);
7696 std::swap(
graph.indicesAreSorted_,
this->indicesAreSorted_);
7697 std::swap(
graph.noRedundancies_,
this->noRedundancies_);
7698 std::swap(
graph.haveLocalConstants_,
this->haveLocalConstants_);
7699 std::swap(
graph.haveGlobalConstants_,
this->haveGlobalConstants_);
7700 std::swap(
graph.haveLocalOffRankOffsets_,
this->haveLocalOffRankOffsets_);
7702 std::swap(
graph.sortGhostsAssociatedWithEachProcessor_,
this->sortGhostsAssociatedWithEachProcessor_);
7704 std::swap(
graph.k_numAllocPerRow_,
this->k_numAllocPerRow_);
7705 std::swap(
graph.k_numRowEntries_,
this->k_numRowEntries_);
7706 std::swap(
graph.nonlocals_,
this->nonlocals_);
7710 template<
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7724 auto v1 =
m1.find(
key)->second;
7725 auto v2 =
m2.find(
key)->second;
7726 std::sort(
v1.begin(),
v1.end());
7727 std::sort(
v2.begin(),
v2.end());
7746 output = this->nodeMaxNumRowEntries_ ==
graph.nodeMaxNumRowEntries_ ?
output :
false;
7749 output = this->globalMaxNumRowEntries_ ==
graph.globalMaxNumRowEntries_ ?
output :
false;
7751 output = this->numAllocForAllRows_ ==
graph.numAllocForAllRows_ ?
output :
false;
7755 output = this->indicesAreAllocated_ ==
graph.indicesAreAllocated_ ?
output :
false;
7761 output = this->haveLocalConstants_ ==
graph.haveLocalConstants_ ?
output :
false;
7762 output = this->haveGlobalConstants_ ==
graph.haveGlobalConstants_ ?
output :
false;
7763 output = this->haveLocalOffRankOffsets_ ==
graph.haveLocalOffRankOffsets_ ?
output :
false;
7764 output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ?
output :
false;
7772 output = this->k_numAllocPerRow_.extent(0) ==
graph.k_numAllocPerRow_.extent(0) ?
output :
false;
7773 if(
output && this->k_numAllocPerRow_.extent(0) > 0)
7775 for(
size_t i=0;
output &&
i<this->k_numAllocPerRow_.extent(0);
i++)
7781 output = this->k_numRowEntries_.extent(0) ==
graph.k_numRowEntries_.extent(0) ?
output :
false;
7782 if(
output && this->k_numRowEntries_.extent(0) > 0)
7784 for(
size_t i = 0;
output &&
i < this->k_numRowEntries_.extent(0);
i++)
7789 output = this->rowPtrsUnpacked_host_.extent(0) ==
graph.rowPtrsUnpacked_host_.extent(0) ?
output :
false;
7790 if(
output && this->rowPtrsUnpacked_host_.extent(0) > 0)
7799 output = this->lclIndsUnpacked_wdv.extent(0) ==
graph.lclIndsUnpacked_wdv.extent(0) ?
output :
false;
7800 if(
output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7802 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7803 auto indGraph =
graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7809 output = this->gblInds_wdv.extent(0) ==
graph.gblInds_wdv.extent(0) ?
output :
false;
7810 if(
output && this->gblInds_wdv.extent(0) > 0)
7812 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7813 auto indtGraph =
graph.gblInds_wdv.getHostView(Access::ReadOnly);
7886#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7888 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7889 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7890 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7891 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7892 CrsGraph<LO,GO,NODE>::node_type>& importer, \
7893 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7894 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7895 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7896 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7897 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7898 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7899 const Teuchos::RCP<Teuchos::ParameterList>& params);
7901#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7903 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7904 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7905 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7906 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7907 CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7908 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7909 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7910 CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7911 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7912 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7913 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7914 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7915 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7916 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7917 const Teuchos::RCP<Teuchos::ParameterList>& params);
7920#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7922 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7923 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7924 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7925 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7926 CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7927 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7928 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7929 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7930 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7931 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7932 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7933 const Teuchos::RCP<Teuchos::ParameterList>& params);
7935#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7937 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7938 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7939 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7940 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7941 CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7942 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7943 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7944 CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7945 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7946 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7947 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7948 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7949 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7950 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7951 const Teuchos::RCP<Teuchos::ParameterList>& params);
7954#define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7955 template class CrsGraph<LO, GO, NODE>; \
7956 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7957 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7958 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7959 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)