Simplify OMP min/max reductions, cleanup raw indexing into CSysVector by pcarruscag · Pull Request #2770 · su2code/SU2 · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Common/include/containers/C2DContainer.hpp
6 changes: 3 additions & 3 deletions Common/include/containers/CPyWrapperMatrixView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
*/
class CPyWrapperMatrixView {
protected:
static_assert(su2activematrix::IsRowMajor, "");
static_assert(su2activematrix::IsRowMajor);
su2double* data_ = nullptr;
unsigned long rows_ = 0, cols_ = 0;
std::string name_;
Expand Down Expand Up @@ -124,7 +124,7 @@ class CPyWrapperMatrixView {
*/
class CPyWrapperMarkerMatrixView {
private:
static_assert(su2activematrix::IsRowMajor, "");
static_assert(su2activematrix::IsRowMajor);
su2double* data_ = nullptr;
const CVertex* const* vertices_ = nullptr;
unsigned long rows_ = 0, cols_ = 0;
Expand Down Expand Up @@ -175,7 +175,7 @@ class CPyWrapperMarkerMatrixView {
*/
class CPyWrapper3DMatrixView {
protected:
static_assert(su2activematrix::IsRowMajor, "");
static_assert(su2activematrix::IsRowMajor);
su2double* data_ = nullptr;
unsigned long rows_ = 0, cols_ = 0, dims_ = 0;
std::string name_;
Expand Down
7 changes: 4 additions & 3 deletions Common/include/linear_algebra/CSysMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,10 +816,11 @@ class CSysMatrix {
}

/*!
* \brief Deletes the values of the row i of the sparse matrix.
* \param[in] i - Index of the row.
* \brief Deletes the values of a row of the sparse matrix.
* \param[in] block_i - Index of the block.
* \param[in] row - Row within the block.
*/
void DeleteValsRowi(unsigned long i);
void DeleteValsRowi(unsigned long block_i, unsigned long row);

/*!
* \brief Modifies this matrix (A) and a rhs vector (b) such that (A^-1 * b)_i = x_i.
Expand Down
3 changes: 1 addition & 2 deletions Common/include/linear_algebra/CSysVector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,10 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
/*--- Reduce over all threads in an ordered way to ensure a deterministic result. ---*/
for (int i = 1; i < omp_get_num_threads(); ++i) sum += dot_scratch[i];
#ifdef HAVE_MPI

/*--- Reduce across all mpi ranks, only the master thread communicates. ---*/
const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE;
SelectMPIWrapper<ScalarType>::W::Allreduce(&sum, &dot_scratch[0], 1, mpi_type, MPI_SUM, SU2_MPI::GetComm());
#endif
}
/*--- Make view of result consistent across threads. ---*/
END_SU2_OMP_SAFE_GLOBAL_ACCESS
Expand Down
34 changes: 25 additions & 9 deletions Common/include/parallelization/mpi_structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,20 @@
#include "mpi_structure.hpp"
#include <cstring> // memcpy

/* Initialise the MPI Communicator Rank and Size */
/* Initialise the MPI Communicator Rank, Size, and default MPI Communicator */
#ifdef HAVE_MPI
int CBaseMPIWrapper::Rank = 0;
int CBaseMPIWrapper::Size = 1;

/* Set the default MPI Communicator */
#ifdef HAVE_MPI
CBaseMPIWrapper::Comm CBaseMPIWrapper::currentComm = MPI_COMM_WORLD;
#else
CBaseMPIWrapper::Comm CBaseMPIWrapper::currentComm = 0; // dummy value
template <typename ScalarType>
int CBaseMPIWrapper<ScalarType>::Rank = 0;

template <typename ScalarType>
int CBaseMPIWrapper<ScalarType>::Size = 1;

template <typename ScalarType>
typename CBaseMPIWrapper<ScalarType>::Comm CBaseMPIWrapper<ScalarType>::currentComm = 0; // dummy value
#endif

#ifdef HAVE_MPI
Expand Down Expand Up @@ -122,7 +127,8 @@ void CBaseMPIWrapper::CopyData(const void* sendbuf, void* recvbuf, int size, Dat
}
#else // HAVE_MPI

void CBaseMPIWrapper::Error(std::string ErrorMsg, std::string FunctionName) {
template <typename ScalarType>
void CBaseMPIWrapper<ScalarType>::Error(const std::string& ErrorMsg, const std::string& FunctionName) {
if (Rank == 0) {
std::cout << std::endl << std::endl;
std::cout << "Error in \"" << FunctionName << "\": " << std::endl;
Expand All @@ -134,12 +140,13 @@ void CBaseMPIWrapper::Error(std::string ErrorMsg, std::string FunctionName) {
Abort(currentComm, 0);
}

void CBaseMPIWrapper::CopyData(const void* sendbuf, void* recvbuf, int size, Datatype datatype, int recvshift,
int sendshift) {
template <typename ScalarType>
void CBaseMPIWrapper<ScalarType>::CopyData(const void* sendbuf, void* recvbuf, int size, Datatype datatype,
int recvshift, int sendshift) {
switch (datatype) {
case MPI_DOUBLE:
for (int i = 0; i < size; i++) {
static_cast<su2double*>(recvbuf)[i + recvshift] = static_cast<const su2double*>(sendbuf)[i + sendshift];
static_cast<ScalarType*>(recvbuf)[i + recvshift] = static_cast<const ScalarType*>(sendbuf)[i + sendshift];
}
break;
case MPI_UNSIGNED_LONG:
Expand Down Expand Up @@ -178,8 +185,17 @@ void CBaseMPIWrapper::CopyData(const void* sendbuf, void* recvbuf, int size, Dat
break;
};
}

template class CBaseMPIWrapper<su2double>;
#if defined CODI_REVERSE_TYPE
template class CBaseMPIWrapper<passivedouble>;
#endif
#if defined USE_MIXED_PRECISION
template class CBaseMPIWrapper<su2mixedfloat>;
#endif

#endif // HAVE_MPI

#ifdef HAVE_MPI
#if defined CODI_REVERSE_TYPE || defined CODI_FORWARD_TYPE
MediTypes* mediTypes;
Expand Down
25 changes: 18 additions & 7 deletions Common/include/parallelization/mpi_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ class CMediMPIWrapper : public CBaseMPIWrapper {
* \class CMPIWrapper
* \brief Version for when there is no MPI.
*/
template <typename ScalarType>
class CBaseMPIWrapper {
public:
typedef int Comm;
Expand All @@ -510,7 +511,7 @@ class CBaseMPIWrapper {
static void CopyData(const void* sendbuf, void* recvbuf, int size, Datatype datatype, int recvshift = 0,
int sendshift = 0);

static void Error(std::string ErrorMsg, std::string FunctionName);
static void Error(const std::string& ErrorMsg, const std::string& FunctionName);

static inline int GetRank() { return Rank; }

Expand Down Expand Up @@ -607,27 +608,37 @@ class CBaseMPIWrapper {

static inline passivedouble Wtime(void) { return omp_get_wtime(); }
};
typedef int SU2_Comm;
typedef CBaseMPIWrapper SU2_MPI;
using SU2_Comm = int;
using SU2_MPI = CBaseMPIWrapper<su2double>;

#endif

/*--- Select the appropriate MPI wrapper based on datatype, to use in templated classes. ---*/
template <class T>
struct SelectMPIWrapper {
typedef SU2_MPI W;
using W = SU2_MPI;
};

/*--- In AD we specialize for the passive wrapper. ---*/
#if defined CODI_REVERSE_TYPE
template <>
struct SelectMPIWrapper<passivedouble> {
typedef CBaseMPIWrapper W;
#if defined HAVE_MPI
using W = CBaseMPIWrapper;
#else
using W = CBaseMPIWrapper<passivedouble>;
#endif
};
#endif

/*--- Specialize for the low precision type. ---*/
#if defined USE_MIXED_PRECISION
template <>
struct SelectMPIWrapper<su2mixedfloat> {
typedef CBaseMPIWrapper W;
};
#if defined HAVE_MPI
using W = CBaseMPIWrapper;
#else
using W = CBaseMPIWrapper<su2mixedfloat>;
#endif
};
#endif
53 changes: 53 additions & 0 deletions Common/include/parallelization/omp_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#pragma once

#include <cstddef>
#include <algorithm>

#include "../code_config.hpp"

Expand Down Expand Up @@ -274,3 +275,55 @@ inline void atomicAdd(T rhs, T& lhs) {
SU2_OMP_ATOMIC
lhs += rhs;
}

/*--- GCC supported atomic compare (for min/max) before it was fully 5.1 compliant. ---*/
#define ATOMIC_COMPARE_SINCE 202011
#ifdef __GNUC__
#if __GNUC__ > 11
#undef ATOMIC_COMPARE_SINCE
#define ATOMIC_COMPARE_SINCE 201511
#endif
#endif

/*--- By default the min/max fallback to critical is required for all types. ---*/
#define ATOMIC_COMPARE_FALLBACK

/*--- Atomic max, shared = max(shared, local). ---*/
#ifdef _OPENMP
#if _OPENMP >= ATOMIC_COMPARE_SINCE
/*--- Atomic min/max are supported for arithmetic types. ---*/
template <class T, su2enable_if<std::is_arithmetic<T>::value> = 0>
inline void atomicMax(const T& local, T& shared) {
#pragma omp atomic compare
shared = shared < local ? local : shared;
}

/*--- Redefine the fallback for non arithmetic types. ---*/
#undef ATOMIC_COMPARE_FALLBACK
#define ATOMIC_COMPARE_FALLBACK , su2enable_if<!std::is_arithmetic<T>::value> = 0
#endif
#endif
template <class T ATOMIC_COMPARE_FALLBACK>
inline void atomicMax(const T& local, T& shared) {
SU2_OMP_CRITICAL
shared = std::max(local, shared);
END_SU2_OMP_CRITICAL
}

/*--- Atomic min, shared = min(shared, local). ---*/
#ifdef _OPENMP
#if _OPENMP >= ATOMIC_COMPARE_SINCE
template <class T, su2enable_if<std::is_arithmetic<T>::value> = 0>
inline void atomicMin(const T& local, T& shared) {
#pragma omp atomic compare
shared = shared > local ? local : shared;
}
#endif
#endif
template <class T ATOMIC_COMPARE_FALLBACK>
inline void atomicMin(const T& local, T& shared) {
SU2_OMP_CRITICAL
shared = std::min(local, shared);
END_SU2_OMP_CRITICAL
}
#undef ATOMIC_COMPARE_FALLBACK
10 changes: 5 additions & 5 deletions Common/include/toolboxes/graph_toolbox.hpp
Loading
Loading