#Redundant function overloads

29 messages · Page 1 of 1 (latest)

junior gust
#
#include <immintrin.h>
struct F64_avx2 {
    __m256d val;
    F64_avx2() = default;
    constexpr inline F64_avx2(__m256d value) :
        val(value) {}
    constexpr inline operator __m256d() const {
        return val;
    }
    F64_avx2(__m256i x) {
        this->val = _mm256_castpd_si256(x);
    }
    F64_avx2(double x) {
        this->val = _mm256_set1_pd(x);
    }
};

static inline F64_avx2 fma(F64_avx2 x, F64_avx2 y, F64_avx2 z) {
    return _mm256_fmadd_pd(x, y, z);
}

static inline F64_avx2 fma(double   x, F64_avx2 y, F64_avx2 z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(F64_avx2 x, double   y, F64_avx2 z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(double   x, double   y, F64_avx2 z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(F64_avx2 x, F64_avx2 y, double   z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(double   x, F64_avx2 y, double   z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(F64_avx2 x, double   y, double   z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
```How can I better write `fma` for the wrapper struct I have for avx2?
torn islandBOT
#

When your question is answered use !solved to mark the question as resolved.

Remember to ask specific questions, provide necessary details, and reduce your question to its simplest form. For tips on how to ask a good question use !howto ask.

junior gust
#

Right now, it seems like __m256d and F64_avx2 will implicitly cast to each other, but I am not really sure if that is possible for double in this case

tidal plaza
#

I would think of a templated function

#

Btw, to me it's complaining about:

fma.cpp:13:41: error: cannot convert ‘__m256i’ to ‘__m256d’
   13 |         this->val = _mm256_castpd_si256(x);
      |                                         ^
      |                                         |
      |                                         __m256i
#

So I assume you wanted to write __m256d as the type there

#

*But then the constructor wouldn't make any sense, because you already have a constexpr one.

#

In general I'm very confused about your lack of const

#

@junior gust This would be my version:

#include <immintrin.h>
#include <concepts>

struct F64_avx2 {
    __m256d val;
    F64_avx2() = default;
    constexpr inline F64_avx2(const __m256d& value) :
        val(value) {}
    constexpr inline operator __m256d() const {
        return val;
    }
    F64_avx2(const double x) {
        this->val = _mm256_set1_pd(x);
    }
};

// this is more in here for the meme, we could've just written our fma_type concept with std::same_as<T, F64_avx2> || std::same_as<T, double>
template<typename T, typename ... U>
concept is_any_of = (std::same_as<T, U> || ...);

template <typename T>
concept fma_type = is_any_of<T, F64_avx2, double>;


template <fma_type X, fma_type Y, fma_type Z>
static inline F64_avx2 fma(const X& x, const Y& y, const Z& z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}

// Specialization for when all operators are already of the required type
template <>
inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(const F64_avx2& x, const F64_avx2& y, const F64_avx2& z) {
    return _mm256_fmadd_pd(x, y, z);
}
#

You could think about making the general version just const instead of const-ref, or you can just add a specialization for when there's 3 doubles that it only uses consts, or you can add a specialization that it should use const-refs if there's 1 or fewer doubles... Your imagination (and the fact it comes out to a maximum of "only" 8 different functions) is the limit

junior gust
#

That is a good bit nicer

tidal plaza
#

happy to hear that

junior gust
#

C++20 concepts makes this stuff a lot less scary lol

tidal plaza
#

Btw, the specialization doesn't have the static keyword, because template specializations prohibit them.
If I were to put a static on there, this would be the error you get:

fma.cpp:32:1: error: explicit template specialization cannot have a storage class
   32 | static inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(const F64_avx2& x, const F64_avx2& y, const F64_avx2& z) {
      | ^~~~~~
junior gust
#

gotcha, right now I am just writing headers and things, I haven't compiled/tested things yet

tidal plaza
nocturne hazel
nocturne hazel
# junior gust gotcha, right now I am just writing headers and things, I haven't compiled/teste...

Well, for starters, you can declare the template function,
and provide template specializations only for the types you care about, meaning:

#include <immintrin.h>
#include <type_traits>

struct F64_avx2 {
    __m256d val;
    F64_avx2() = default;
    constexpr inline F64_avx2(__m256d value) :
        val(value) {}
    constexpr inline operator __m256d() const {
        return val;
    }
    F64_avx2(__m256i x) {
        //this->val = _mm256_castpd_si256(x);
    }
    F64_avx2(double x) {
        this->val = _mm256_set1_pd(x);
    }
};

template<typename T>
using ValidT = typename std::enable_if<std::is_same<T, double>::value || std::is_same<T, F64_avx2>::value, T>::type;

template<typename T1, typename T2, typename T3>
inline F64_avx2 fma(T1 x, T2 y, T3 z)
{
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}

template<>
inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(F64_avx2 x, F64_avx2 y, F64_avx2 z)
{
    return _mm256_fmadd_pd(x, y, z);
}

int main()
{
    
}
#

this template is very promiscuous and allows any one to insert any type they wish (add horny meme here), but it's a start

#

you can manually list the full specializations you want to allow to be explicit

tidal plaza
# nocturne hazel this template is very promiscuous and allows any one to insert any type they wis...

But did you even read what was already posted?

If you had then you might've noticed the code I've sent:

#include <immintrin.h>
#include <concepts>

struct F64_avx2 {
    __m256d val;
    F64_avx2() = default;
    constexpr inline F64_avx2(const __m256d& value) :
        val(value) {}
    constexpr inline operator __m256d() const {
        return val;
    }
    F64_avx2(const double x) {
        this->val = _mm256_set1_pd(x);
    }
};

// this is more in here for the meme, we could've just written our fma_type concept with std::same_as<T, F64_avx2> || std::same_as<T, double>
template<typename T, typename ... U>
concept is_any_of = (std::same_as<T, U> || ...);

template <typename T>
concept fma_type = is_any_of<T, F64_avx2, double>;


template <fma_type X, fma_type Y, fma_type Z>
static inline F64_avx2 fma(const X& x, const Y& y, const Z& z) {
    return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}

// Specialization for when all operators are already of the required type
template <>
inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(const F64_avx2& x, const F64_avx2& y, const F64_avx2& z) {
    return _mm256_fmadd_pd(x, y, z);
}
```that also fixed a few other issues with the code, like e.g. the constructor accepting `__m256i`. That just doesn't work and makes no sense in the first place.
#

And my solution also fixes the

allows any one to insert any type they wish
problem

junior gust
tidal plaza
#

I haven't compiled/tested things yet
Oh, yeah, just remembered that

#

Dangerous. You should always try to build asap