#include <immintrin.h>
struct F64_avx2 {
__m256d val;
F64_avx2() = default;
constexpr inline F64_avx2(__m256d value) :
val(value) {}
constexpr inline operator __m256d() const {
return val;
}
F64_avx2(__m256i x) {
this->val = _mm256_castpd_si256(x);
}
F64_avx2(double x) {
this->val = _mm256_set1_pd(x);
}
};
static inline F64_avx2 fma(F64_avx2 x, F64_avx2 y, F64_avx2 z) {
return _mm256_fmadd_pd(x, y, z);
}
static inline F64_avx2 fma(double x, F64_avx2 y, F64_avx2 z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(F64_avx2 x, double y, F64_avx2 z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(double x, double y, F64_avx2 z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(F64_avx2 x, F64_avx2 y, double z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(double x, F64_avx2 y, double z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
static inline F64_avx2 fma(F64_avx2 x, double y, double z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
```How can I better write `fma` for the wrapper struct I have for avx2?
#Redundant function overloads
29 messages · Page 1 of 1 (latest)
When your question is answered use !solved to mark the question as resolved.
Remember to ask specific questions, provide necessary details, and reduce your question to its simplest form. For tips on how to ask a good question use !howto ask.
Right now, it seems like __m256d and F64_avx2 will implicitly cast to each other, but I am not really sure if that is possible for double in this case
You don't want to accept a version:
static inline F64_avx2 fma(double x, double y, double z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
```?
I would think of a templated function
Btw, to me it's complaining about:
fma.cpp:13:41: error: cannot convert ‘__m256i’ to ‘__m256d’
13 | this->val = _mm256_castpd_si256(x);
| ^
| |
| __m256i
So I assume you wanted to write __m256d as the type there
*But then the constructor wouldn't make any sense, because you already have a constexpr one.
In general I'm very confused about your lack of const
@junior gust This would be my version:
#include <immintrin.h>
#include <concepts>
struct F64_avx2 {
__m256d val;
F64_avx2() = default;
constexpr inline F64_avx2(const __m256d& value) :
val(value) {}
constexpr inline operator __m256d() const {
return val;
}
F64_avx2(const double x) {
this->val = _mm256_set1_pd(x);
}
};
// this is more in here for the meme, we could've just written our fma_type concept with std::same_as<T, F64_avx2> || std::same_as<T, double>
template<typename T, typename ... U>
concept is_any_of = (std::same_as<T, U> || ...);
template <typename T>
concept fma_type = is_any_of<T, F64_avx2, double>;
template <fma_type X, fma_type Y, fma_type Z>
static inline F64_avx2 fma(const X& x, const Y& y, const Z& z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
// Specialization for when all operators are already of the required type
template <>
inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(const F64_avx2& x, const F64_avx2& y, const F64_avx2& z) {
return _mm256_fmadd_pd(x, y, z);
}
You could think about making the general version just const instead of const-ref, or you can just add a specialization for when there's 3 doubles that it only uses consts, or you can add a specialization that it should use const-refs if there's 1 or fewer doubles... Your imagination (and the fact it comes out to a maximum of "only" 8 different functions) is the limit
That is a good bit nicer
happy to hear that
C++20 concepts makes this stuff a lot less scary lol
Btw, the specialization doesn't have the static keyword, because template specializations prohibit them.
If I were to put a static on there, this would be the error you get:
fma.cpp:32:1: error: explicit template specialization cannot have a storage class
32 | static inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(const F64_avx2& x, const F64_avx2& y, const F64_avx2& z) {
| ^~~~~~
gotcha, right now I am just writing headers and things, I haven't compiled/tested things yet
hell yeah. I never properly understood how to do SFINAE, but concepts are so easy that I really don't need to worry about SFINAE
Btw, in case you don't know what SFINAE means, it means "Substitution Failure Is Not An Error": https://en.cppreference.com/w/cpp/language/sfinae
nope it's identical, just adds more suntax sugar
Well, for starters, you can declare the template function,
and provide template specializations only for the types you care about, meaning:
#include <immintrin.h>
#include <type_traits>
struct F64_avx2 {
__m256d val;
F64_avx2() = default;
constexpr inline F64_avx2(__m256d value) :
val(value) {}
constexpr inline operator __m256d() const {
return val;
}
F64_avx2(__m256i x) {
//this->val = _mm256_castpd_si256(x);
}
F64_avx2(double x) {
this->val = _mm256_set1_pd(x);
}
};
template<typename T>
using ValidT = typename std::enable_if<std::is_same<T, double>::value || std::is_same<T, F64_avx2>::value, T>::type;
template<typename T1, typename T2, typename T3>
inline F64_avx2 fma(T1 x, T2 y, T3 z)
{
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
template<>
inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(F64_avx2 x, F64_avx2 y, F64_avx2 z)
{
return _mm256_fmadd_pd(x, y, z);
}
int main()
{
}
this template is very promiscuous and allows any one to insert any type they wish (add horny meme here), but it's a start
you can manually list the full specializations you want to allow to be explicit
But did you even read what was already posted?
If you had then you might've noticed the code I've sent:
#include <immintrin.h>
#include <concepts>
struct F64_avx2 {
__m256d val;
F64_avx2() = default;
constexpr inline F64_avx2(const __m256d& value) :
val(value) {}
constexpr inline operator __m256d() const {
return val;
}
F64_avx2(const double x) {
this->val = _mm256_set1_pd(x);
}
};
// this is more in here for the meme, we could've just written our fma_type concept with std::same_as<T, F64_avx2> || std::same_as<T, double>
template<typename T, typename ... U>
concept is_any_of = (std::same_as<T, U> || ...);
template <typename T>
concept fma_type = is_any_of<T, F64_avx2, double>;
template <fma_type X, fma_type Y, fma_type Z>
static inline F64_avx2 fma(const X& x, const Y& y, const Z& z) {
return _mm256_fmadd_pd(F64_avx2(x), F64_avx2(y), F64_avx2(z));
}
// Specialization for when all operators are already of the required type
template <>
inline F64_avx2 fma<F64_avx2, F64_avx2, F64_avx2>(const F64_avx2& x, const F64_avx2& y, const F64_avx2& z) {
return _mm256_fmadd_pd(x, y, z);
}
```that also fixed a few other issues with the code, like e.g. the constructor accepting `__m256i`. That just doesn't work and makes no sense in the first place.
And my solution also fixes the
allows any one to insert any type they wish
problem
The constructor for __m256i was to save me from typing _mm256_castpd_si256, but yeah probably better to type the thing out
But did that even compile for you?
I haven't compiled/tested things yet
Oh, yeah, just remembered that
Dangerous. You should always try to build asap
whoops didnt see