en/latest/intrinsics_8h_source.html

 #pragma once


 // === DECLARATIONS ===================================================================================================


 #include "mjolnir/core/x86/definitions.h"


 #include <concepts>


 namespace mjolnir::x86

 {


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_add(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;


 template <I32 t_mask, FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_blend(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_broadcast(T_RegisterType src) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterTypeIn>

 [[nodiscard]] inline auto mm_cast_fi(T_RegisterTypeIn src) noexcept;


 template <FloatVectorRegister T_RegisterTypeOut, IntegerVectorRegister T_RegisterTypeIn>

 [[nodiscard]] inline auto mm_cast_if(T_RegisterTypeIn src) noexcept -> T_RegisterTypeOut;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_eq(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_ge(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_gt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_le(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_lt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cvt_float(T_RegisterType src) -> ElementType<T_RegisterType>;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_fmadd(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_fmsub(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_load(ElementType<T_RegisterType>* ptr) noexcept -> T_RegisterType;


 template <IntegerVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_movemask_epi8(T_RegisterType src) noexcept;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_mul(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;


 template <I32 t_mask, FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_permute(T_RegisterType src) noexcept -> T_RegisterType;


 template <I32 t_mask, FloatAVXRegister T_RegisterType>

 [[nodiscard]] inline auto mm_permute2f128(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_set1(ElementType<T_RegisterType> value) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType, typename... T_Args>

 [[nodiscard]] inline auto mm_setr(T_Args... args) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_setzero() noexcept -> T_RegisterType;


 template <UST t_mask, FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_shuffle(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 inline void mm_store(ElementType<T_RegisterType>* ptr, T_RegisterType reg) noexcept;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_sub(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;


 } // namespace mjolnir::x86


 // === DEFINITIONS ====================================================================================================


 #include "mjolnir/core/utility/pointer_operations.h"

 #include "mjolnir/core/x86/x86.h"


 #include <cassert>

 #include <utility>


 namespace mjolnir::x86

 {

 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_add(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_add_ps(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_add_pd(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_add_ps(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else

         return _mm256_add_pd(lhs, rhs); // NOLINT(portability-simd-intrinsics)

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_and_ps(a, b);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_and_pd(a, b);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_and_ps(a, b);

     else

         return _mm256_and_pd(a, b);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_andnot_ps(a, b);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_andnot_pd(a, b);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_andnot_ps(a, b);

     else

         return _mm256_andnot_pd(a, b);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <I32 t_mask, FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_blend(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_blend_ps(a, b, t_mask);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_blend_pd(a, b, t_mask);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_blend_ps(a, b, t_mask);

     else

         return _mm256_blend_pd(a, b, t_mask);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_broadcast(T_RegisterType src) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_broadcastss_ps(src);

     else if constexpr (is_m128d<T_RegisterType>)

         // The following command is currently missing in gcc - see https://stackoverflow.com/q/58270381/6700329

         // Should be fixed in gcc 11 - see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95483

         // return _mm_broadcastsd_pd(src);

         return _mm_movedup_pd(src);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_broadcastss_ps(_mm256_castps256_ps128(src));

     else

         return _mm256_broadcastsd_pd(_mm256_castpd256_pd128(src));

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterTypeIn>

 [[nodiscard]] inline auto mm_cast_fi(T_RegisterTypeIn src) noexcept

 {

     if constexpr (is_m128<T_RegisterTypeIn>)

         return _mm_castps_si128(src);

     else if constexpr (is_m128d<T_RegisterTypeIn>)

         return _mm_castpd_si128(src);

     else if constexpr (is_m256<T_RegisterTypeIn>)

         return _mm256_castps_si256(src);

     else

         return _mm256_castpd_si256(src);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterTypeOut, IntegerVectorRegister T_RegisterTypeIn>

 [[nodiscard]] inline auto mm_cast_if(T_RegisterTypeIn src) noexcept -> T_RegisterTypeOut

 {

     if constexpr (is_m128<T_RegisterTypeOut>)

         return _mm_castsi128_ps(src);

     else if constexpr (is_m128d<T_RegisterTypeOut>)

         return _mm_castsi128_pd(src);

     else if constexpr (is_m256<T_RegisterTypeOut>)

         return _mm256_castsi256_ps(src);

     else

         return _mm256_castsi256_pd(src);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_eq(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_cmpeq_ps(lhs, rhs);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_cmpeq_pd(lhs, rhs);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_cmp_ps(lhs, rhs, _CMP_EQ_OS);

     else

         return _mm256_cmp_pd(lhs, rhs, _CMP_EQ_OS);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_ge(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_cmpge_ps(lhs, rhs);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_cmpge_pd(lhs, rhs);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_cmp_ps(lhs, rhs, _CMP_GE_OS);

     else

         return _mm256_cmp_pd(lhs, rhs, _CMP_GE_OS);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_gt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_cmpgt_ps(lhs, rhs);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_cmpgt_pd(lhs, rhs);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_cmp_ps(lhs, rhs, _CMP_GT_OS);

     else

         return _mm256_cmp_pd(lhs, rhs, _CMP_GT_OS);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_le(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_cmple_ps(lhs, rhs);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_cmple_pd(lhs, rhs);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_cmp_ps(lhs, rhs, _CMP_LE_OS);

     else

         return _mm256_cmp_pd(lhs, rhs, _CMP_LE_OS);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cmp_lt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_cmplt_ps(lhs, rhs);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_cmplt_pd(lhs, rhs);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_cmp_ps(lhs, rhs, _CMP_LT_OS);

     else

         return _mm256_cmp_pd(lhs, rhs, _CMP_LT_OS);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_cvt_float(T_RegisterType src) -> ElementType<T_RegisterType>

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_cvtss_f32(src);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_cvtsd_f64(src);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_cvtss_f32(src);

     else

         return _mm256_cvtsd_f64(src);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_fmadd(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_fmadd_ps(a, b, c);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_fmadd_pd(a, b, c);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_fmadd_ps(a, b, c);

     else

         return _mm256_fmadd_pd(a, b, c);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_fmsub(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_fmsub_ps(a, b, c);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_fmsub_pd(a, b, c);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_fmsub_ps(a, b, c);

     else

         return _mm256_fmsub_pd(a, b, c);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_load(ElementType<T_RegisterType>* ptr) noexcept -> T_RegisterType

 {

     assert(is_aligned<alignment_bytes<T_RegisterType>>(ptr)); // NOLINT


     if constexpr (is_m128<T_RegisterType>)

         return _mm_load_ps(ptr);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_load_pd(ptr);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_load_ps(ptr);

     else

         return _mm256_load_pd(ptr);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <IntegerVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_movemask_epi8(T_RegisterType src) noexcept

 {

     if constexpr (is_m128i<T_RegisterType>)

         return static_cast<U16>(_mm_movemask_epi8(src));

     else

         return static_cast<U32>(_mm256_movemask_epi8(src));

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_mul(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_mul_ps(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_mul_pd(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_mul_ps(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else

         return _mm256_mul_pd(lhs, rhs); // NOLINT(portability-simd-intrinsics)

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_or_ps(a, b);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_or_pd(a, b);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_or_ps(a, b);

     else

         return _mm256_or_pd(a, b);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <I32 t_mask, FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_permute(T_RegisterType src) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_permute_ps(src, t_mask);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_permute_pd(src, t_mask);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_permute_ps(src, t_mask);

     else

         return _mm256_permute_pd(src, t_mask);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <I32 t_mask, FloatAVXRegister T_RegisterType>

 [[nodiscard]] inline auto mm_permute2f128(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType

 {

     if constexpr (is_m256<T_RegisterType>)

         return _mm256_permute2f128_ps(a, b, t_mask);

     else

         return _mm256_permute2f128_pd(a, b, t_mask);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_set1(ElementType<T_RegisterType> value) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_set1_ps(value);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_set1_pd(value);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_set1_ps(value);

     else

         return _mm256_set1_pd(value);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType, typename... T_Args>

 [[nodiscard]] inline auto mm_setr(T_Args... args) noexcept -> T_RegisterType

 {

     using EType = ElementType<T_RegisterType>;


     if constexpr (is_m128<T_RegisterType>)

         return _mm_setr_ps(static_cast<EType>(args)...);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_setr_pd(static_cast<EType>(args)...);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_setr_ps(static_cast<EType>(args)...);

     else

         return _mm256_setr_pd(static_cast<EType>(args)...);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_setzero() noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_setzero_ps();

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_setzero_pd();

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_setzero_ps();

     else

         return _mm256_setzero_pd();

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <UST t_mask, FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_shuffle(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_shuffle_ps(a, b, t_mask);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_shuffle_pd(a, b, t_mask);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_shuffle_ps(a, b, t_mask);

     else

         return _mm256_shuffle_pd(a, b, t_mask);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 inline void mm_store(ElementType<T_RegisterType>* ptr, T_RegisterType reg) noexcept

 {

     assert(is_aligned<alignment_bytes<T_RegisterType>>(ptr)); // NOLINT


     if constexpr (is_m128<T_RegisterType>)

         _mm_store_ps(ptr, reg);

     else if constexpr (is_m128d<T_RegisterType>)

         _mm_store_pd(ptr, reg);

     else if constexpr (is_m256<T_RegisterType>)

         _mm256_store_ps(ptr, reg);

     else

         _mm256_store_pd(ptr, reg);

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_sub(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_sub_ps(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_sub_pd(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_sub_ps(lhs, rhs); // NOLINT(portability-simd-intrinsics)

     else

         return _mm256_sub_pd(lhs, rhs); // NOLINT(portability-simd-intrinsics)

 }


 // --------------------------------------------------------------------------------------------------------------------


 template <FloatVectorRegister T_RegisterType>

 [[nodiscard]] inline auto mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType

 {

     if constexpr (is_m128<T_RegisterType>)

         return _mm_xor_ps(a, b);

     else if constexpr (is_m128d<T_RegisterType>)

         return _mm_xor_pd(a, b);

     else if constexpr (is_m256<T_RegisterType>)

         return _mm256_xor_ps(a, b);

     else

         return _mm256_xor_pd(a, b);

 }


 } // namespace mjolnir::x86

mjolnir::U32
std::uint32_t U32
32 bit unsigned integer type
Definition: fundamental_types.h:27

mjolnir::U16
std::uint16_t U16
16 bit unsigned integer type
Definition: fundamental_types.h:26

mjolnir::is_aligned
auto is_aligned(const volatile T_Type *pointer) noexcept -> bool
Check if a passed pointer is aligned.
Definition: pointer_operations.h:191

mjolnir::x86::mm_cvt_float
auto mm_cvt_float(T_RegisterType src) -> ElementType< T_RegisterType >
Return the first element of src.
Definition: intrinsics.h:739

mjolnir::x86::mm_sub
auto mm_sub(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Subtract rhs element-wise from rhs and return the result.
Definition: intrinsics.h:961

mjolnir::x86::mm_setr
auto mm_setr(T_Args... args) noexcept -> T_RegisterType
Set register elements with the supplied values in reverse order.
Definition: intrinsics.h:893

mjolnir::x86::mm_broadcast
auto mm_broadcast(T_RegisterType src) noexcept -> T_RegisterType
Broadcasts the lowest floating point element across lanes to all elements of the returned register.
Definition: intrinsics.h:608

mjolnir::x86::mm_andnot
auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise NOT of all elements in a and then AND with b.
Definition: intrinsics.h:576

mjolnir::x86::mm_cmp_le
auto mm_cmp_le(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are less equal than the ones in rhs.
Definition: intrinsics.h:707

mjolnir::x86::mm_fmsub
auto mm_fmsub(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
Perform an element-wise multiplication of a and b, subtract c and return the result.
Definition: intrinsics.h:771

mjolnir::x86::mm_cast_if
auto mm_cast_if(T_RegisterTypeIn src) noexcept -> T_RegisterTypeOut
Bit cast an integer vector register to an equally sized floating-point vector register.
Definition: intrinsics.h:643

mjolnir::x86::ElementType
typename std::conditional_t< is_any_of< T_RegisterType, __m128d, __m256d >(), F64, F32 > ElementType
The element type of an x86 vector register that is based on floating-point types.
Definition: definitions.h:212

mjolnir::x86::mm_and
auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise AND of a and b.
Definition: intrinsics.h:560

mjolnir::x86::mm_fmadd
auto mm_fmadd(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
Perform an element-wise multiplication of a and b, add c and return the result.
Definition: intrinsics.h:755

mjolnir::x86::mm_blend
auto mm_blend(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Blend elements from a and b using a control mask and return the resulting vector register.
Definition: intrinsics.h:592

mjolnir::x86::mm_xor
auto mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise XOR of a and b.
Definition: intrinsics.h:977

mjolnir::x86::mm_shuffle
auto mm_shuffle(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Return a register with the first half of the lane elements selected from a and the second half from b...
Definition: intrinsics.h:927

mjolnir::x86::mm_store
void mm_store(ElementType< T_RegisterType > *ptr, T_RegisterType reg) noexcept
Store the content of a register to a memory address.
Definition: intrinsics.h:943

mjolnir::x86::mm_cmp_ge
auto mm_cmp_ge(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are greater equal than the ones in rhs.
Definition: intrinsics.h:675

mjolnir::x86::FloatVectorRegister
concept FloatVectorRegister
Concept for a x86 vector register that has floating-point elements.
Definition: definitions.h:39

mjolnir::x86::mm_cmp_lt
auto mm_cmp_lt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are less than the ones in rhs.
Definition: intrinsics.h:723

mjolnir::x86::mm_cmp_gt
auto mm_cmp_gt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are greater than the ones in rhs.
Definition: intrinsics.h:691

mjolnir::x86::mm_permute2f128
auto mm_permute2f128(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Shuffle 128-bits lanes selected by t_mask from a and b, and return the results in a new register.
Definition: intrinsics.h:865

mjolnir::x86::mm_mul
auto mm_mul(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Perform an element-wise multiplication of lhs and rhs and return the result.
Definition: intrinsics.h:817

mjolnir::x86::mm_cmp_eq
auto mm_cmp_eq(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare the register elements in lhs and rhs for equality and return the result.
Definition: intrinsics.h:659

mjolnir::x86::mm_movemask_epi8
auto mm_movemask_epi8(T_RegisterType src) noexcept
Create mask from the most significant bit of each 8-bit element in src, and return the result as unsi...
Definition: intrinsics.h:805

mjolnir::x86::mm_cast_fi
auto mm_cast_fi(T_RegisterTypeIn src) noexcept
Bit cast a floating-point vector register to an equally sized integer vector register.
Definition: intrinsics.h:627

mjolnir::x86::mm_add
auto mm_add(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Perform an element-wise addition of lhs and rhs and return the result.
Definition: intrinsics.h:544

mjolnir::x86::mm_set1
auto mm_set1(ElementType< T_RegisterType > value) noexcept -> T_RegisterType
Broadcast a single value a to all elements of the register.
Definition: intrinsics.h:877

mjolnir::x86::mm_setzero
auto mm_setzero() noexcept -> T_RegisterType
Return a vector register with all elements set to zero.
Definition: intrinsics.h:911

mjolnir::x86::mm_or
auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise OR of a and b.
Definition: intrinsics.h:833

mjolnir::x86::mm_permute
auto mm_permute(T_RegisterType src) noexcept -> T_RegisterType
Shuffle the elements in src using the control mask t_mask and return the resulting vector register.
Definition: intrinsics.h:849

mjolnir::x86::mm_load
auto mm_load(ElementType< T_RegisterType > *ptr) noexcept -> T_RegisterType
Load data from an aligned memory location into a new register.
Definition: intrinsics.h:787

definitions.h
Contains x86 vectorization specific constants, concepts and definitions.

x86.h
This header includes the correct x86 header depending on the operation system.