16 namespace mjolnir::x86
36 template <FloatVectorRegister T_RegisterType>
37 [[nodiscard]]
inline auto abs(T_RegisterType src) noexcept -> T_RegisterType;
54 template <
bool t_all_mag_positive = false, FloatVectorRegister T_RegisterType>
55 [[nodiscard]]
inline auto copy_sign(T_RegisterType src_magnitude, T_RegisterType src_sign) noexcept -> T_RegisterType;
69 template <FloatVectorRegister T_RegisterType>
70 [[nodiscard]]
inline auto negate_all(T_RegisterType src) noexcept -> T_RegisterType;
88 [[nodiscard]]
inline auto negate_selected(T_RegisterType src) noexcept -> T_RegisterType;
103 namespace mjolnir::x86
107 template <FloatVectorRegister T_RegisterType>
108 [[nodiscard]]
inline auto abs(T_RegisterType src) noexcept -> T_RegisterType
111 const auto mask = mm_set1<T_RegisterType>(
static_cast<EType
>(-0.0));
119 template <
bool t_all_mag_positive, FloatVectorRegister T_RegisterType>
120 [[nodiscard]]
inline auto copy_sign(T_RegisterType src_magnitude, T_RegisterType src_sign) noexcept -> T_RegisterType
123 const auto mask = mm_set1<T_RegisterType>(
static_cast<EType
>(-0.0));
125 T_RegisterType sign =
mm_and(src_sign, mask);
127 if constexpr (! t_all_mag_positive)
128 src_magnitude =
mm_andnot(mask, src_magnitude);
131 return mm_or(sign, src_magnitude);
138 template <FloatVectorRegister T_RegisterType>
139 [[nodiscard]]
inline auto negate_all(T_RegisterType src) noexcept -> T_RegisterType
141 constexpr
UST n_e = num_elements<T_RegisterType>;
143 if constexpr (n_e == 2)
144 return negate_selected<1, 1>(src);
145 else if constexpr (n_e == 4)
146 return negate_selected<1, 1, 1, 1>(src);
148 return negate_selected<1, 1, 1, 1, 1, 1, 1, 1>(src);
156 [[nodiscard]]
inline auto negate_selected(T_RegisterType src) noexcept -> T_RegisterType
159 constexpr
UST n_e = num_elements<T_RegisterType>;
161 static_assert(
sizeof...(t_neg) == n_e,
162 "Number of boolean template parameters must be 0 or equal to the number of register elements");
164 constexpr
auto get_mask = [](
bool a) constexpr->EType
167 return static_cast<EType
>(-0.);
168 return static_cast<EType
>(0.);
170 auto mask = mm_setr<T_RegisterType>(get_mask(t_neg)...);
Contains functions to compare vector registers.
std::size_t UST
Unsigned integer type that is returned by sizeof operations.
Definition: fundamental_types.h:29
auto negate_selected(T_RegisterType src) noexcept -> T_RegisterType
Return a copy of the source register with the selected elements being negated.
Definition: sign_manipulation.h:156
auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise NOT of all elements in a and then AND with b.
Definition: intrinsics.h:576
auto negate_all(T_RegisterType src) noexcept -> T_RegisterType
Return a new register with the negated elements of the source register.
Definition: sign_manipulation.h:139
typename std::conditional_t< is_any_of< T_RegisterType, __m128d, __m256d >(), F64, F32 > ElementType
The element type of an x86 vector register that is based on floating-point types.
Definition: definitions.h:212
auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise AND of a and b.
Definition: intrinsics.h:560
auto mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise XOR of a and b.
Definition: intrinsics.h:977
concept FloatVectorRegister
Concept for a x86 vector register that has floating-point elements.
Definition: definitions.h:39
auto is_memory_zero(T_RegisterType a) noexcept -> bool
Return true if the whole memory of the passed register is zero and false otherwise.
Definition: comparison.h:705
auto copy_sign(T_RegisterType src_magnitude, T_RegisterType src_sign) noexcept -> T_RegisterType
Return a new register consisting of the magnitudes from src_magnitude and the signs of src_sign.
Definition: sign_manipulation.h:120
auto abs(T_RegisterType src) noexcept -> T_RegisterType
Return a new register with the absolute values of the input register.
Definition: sign_manipulation.h:108
auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise OR of a and b.
Definition: intrinsics.h:833
Contains generalized/template versions of the x86 intrinsics.
Contains x86 vectorization specific constants, concepts and definitions.