You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
135 lines
2.4 KiB
135 lines
2.4 KiB
#ifndef COMMON_H
|
|
#define COMMON_H
|
|
|
|
#include "co_types.h"
|
|
#include <cmath>
|
|
#include <algorithm>
|
|
|
|
#if defined(_OPENMP)
|
|
#include <omp.h>
|
|
#endif
|
|
|
|
|
|
#define DISABLE_COPY_AND_ASSIGN(classname) \
|
|
private:\
|
|
classname(const classname&) = delete;\
|
|
classname& operator=(const classname&) = delete;
|
|
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
void fill(T* arr, int N, T val) {
|
|
for(int idx = 0; idx < N; ++idx) {
|
|
arr[idx] = val;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
void fill_zero(T* arr, int N) {
|
|
for(int idx = 0; idx < N; ++idx) {
|
|
arr[idx] = 0;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
inline T distance_euclidean(const T* q, const T* t, int N) {
|
|
T out = 0;
|
|
for(int idx = 0; idx < N; idx++) {
|
|
T diff = q[idx] - t[idx];
|
|
out += diff * diff;
|
|
}
|
|
return out;
|
|
}
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
inline T distance_l2(const T* q, const T* t, int N) {
|
|
T out = distance_euclidean(q, t, N);
|
|
out = std::sqrt(out);
|
|
return out;
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
struct FillFunctor {
|
|
T* arr;
|
|
const T val;
|
|
|
|
FillFunctor(T* arr, const T val) : arr(arr), val(val) {}
|
|
CPU_GPU_FUNCTION void operator()(const int idx) {
|
|
arr[idx] = val;
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
T mmin(const T& a, const T& b) {
|
|
#ifdef __CUDA_ARCH__
|
|
return min(a, b);
|
|
#else
|
|
return std::min(a, b);
|
|
#endif
|
|
}
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
T mmax(const T& a, const T& b) {
|
|
#ifdef __CUDA_ARCH__
|
|
return max(a, b);
|
|
#else
|
|
return std::max(a, b);
|
|
#endif
|
|
}
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
T mround(const T& a) {
|
|
#ifdef __CUDA_ARCH__
|
|
return round(a);
|
|
#else
|
|
return round(a);
|
|
#endif
|
|
}
|
|
|
|
|
|
#ifdef __CUDA_ARCH__
|
|
#if __CUDA_ARCH__ < 600
|
|
__device__ double atomicAdd(double* address, double val)
|
|
{
|
|
unsigned long long int* address_as_ull =
|
|
(unsigned long long int*)address;
|
|
unsigned long long int old = *address_as_ull, assumed;
|
|
|
|
do {
|
|
assumed = old;
|
|
old = atomicCAS(address_as_ull, assumed,
|
|
__double_as_longlong(val +
|
|
__longlong_as_double(assumed)));
|
|
|
|
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
|
|
} while (assumed != old);
|
|
|
|
return __longlong_as_double(old);
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
template <typename T>
|
|
CPU_GPU_FUNCTION
|
|
void matomic_add(T* addr, T val) {
|
|
#ifdef __CUDA_ARCH__
|
|
atomicAdd(addr, val);
|
|
#else
|
|
#if defined(_OPENMP)
|
|
#pragma omp atomic
|
|
#endif
|
|
*addr += val;
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|