17 namespace mjolnir::x86
36 template <FloatVectorRegister T_RegisterType>
37 [[nodiscard]]
inline auto mm_add(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
53 template <FloatVectorRegister T_RegisterType>
54 [[nodiscard]]
inline auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;
70 template <FloatVectorRegister T_RegisterType>
71 [[nodiscard]]
inline auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;
90 template <I32 t_mask, FloatVectorRegister T_RegisterType>
91 [[nodiscard]]
inline auto mm_blend(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;
112 template <FloatVectorRegister T_RegisterType>
113 [[nodiscard]]
inline auto mm_broadcast(T_RegisterType src) noexcept -> T_RegisterType;
129 template <FloatVectorRegister T_RegisterTypeIn>
130 [[nodiscard]]
inline auto mm_cast_fi(T_RegisterTypeIn src) noexcept;
146 template <FloatVectorRegister T_RegisterTypeOut, IntegerVectorRegister T_RegisterTypeIn>
147 [[nodiscard]]
inline auto mm_cast_if(T_RegisterTypeIn src) noexcept -> T_RegisterTypeOut;
167 template <FloatVectorRegister T_RegisterType>
168 [[nodiscard]]
inline auto mm_cmp_eq(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
188 template <FloatVectorRegister T_RegisterType>
189 [[nodiscard]]
inline auto mm_cmp_ge(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
209 template <FloatVectorRegister T_RegisterType>
210 [[nodiscard]]
inline auto mm_cmp_gt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
230 template <FloatVectorRegister T_RegisterType>
231 [[nodiscard]]
inline auto mm_cmp_le(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
251 template <FloatVectorRegister T_RegisterType>
252 [[nodiscard]]
inline auto mm_cmp_lt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
266 template <FloatVectorRegister T_RegisterType>
267 [[nodiscard]]
inline auto mm_cvt_float(T_RegisterType src) -> ElementType<T_RegisterType>;
285 template <FloatVectorRegister T_RegisterType>
286 [[nodiscard]]
inline auto mm_fmadd(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType;
304 template <FloatVectorRegister T_RegisterType>
305 [[nodiscard]]
inline auto mm_fmsub(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType;
319 template <FloatVectorRegister T_RegisterType>
320 [[nodiscard]]
inline auto mm_load(ElementType<T_RegisterType>* ptr) noexcept -> T_RegisterType;
337 template <IntegerVectorRegister T_RegisterType>
354 template <FloatVectorRegister T_RegisterType>
355 [[nodiscard]]
inline auto mm_mul(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
371 template <FloatVectorRegister T_RegisterType>
372 [[nodiscard]]
inline auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;
389 template <I32 t_mask, FloatVectorRegister T_RegisterType>
390 [[nodiscard]]
inline auto mm_permute(T_RegisterType src) noexcept -> T_RegisterType;
410 template <I32 t_mask, FloatAVXRegister T_RegisterType>
411 [[nodiscard]]
inline auto mm_permute2f128(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;
425 template <FloatVectorRegister T_RegisterType>
426 [[nodiscard]]
inline auto mm_set1(ElementType<T_RegisterType> value) noexcept -> T_RegisterType;
443 [[nodiscard]]
inline auto mm_setr(T_Args... args) noexcept -> T_RegisterType;
454 template <FloatVectorRegister T_RegisterType>
455 [[nodiscard]]
inline auto mm_setzero() noexcept -> T_RegisterType;
475 [[nodiscard]] inline auto
mm_shuffle(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;
506 [[nodiscard]] inline auto
mm_sub(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType;
523 [[nodiscard]] inline auto
mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType;
532 #include "mjolnir/core/utility/pointer_operations.h"
539 namespace mjolnir::x86
543 template <FloatVectorRegister T_RegisterType>
544 [[nodiscard]]
inline auto mm_add(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
546 if constexpr (is_m128<T_RegisterType>)
547 return _mm_add_ps(lhs, rhs);
548 else if constexpr (is_m128d<T_RegisterType>)
549 return _mm_add_pd(lhs, rhs);
550 else if constexpr (is_m256<T_RegisterType>)
551 return _mm256_add_ps(lhs, rhs);
553 return _mm256_add_pd(lhs, rhs);
559 template <FloatVectorRegister T_RegisterType>
560 [[nodiscard]]
inline auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
562 if constexpr (is_m128<T_RegisterType>)
563 return _mm_and_ps(a, b);
564 else if constexpr (is_m128d<T_RegisterType>)
565 return _mm_and_pd(a, b);
566 else if constexpr (is_m256<T_RegisterType>)
567 return _mm256_and_ps(a, b);
569 return _mm256_and_pd(a, b);
575 template <FloatVectorRegister T_RegisterType>
576 [[nodiscard]]
inline auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
578 if constexpr (is_m128<T_RegisterType>)
579 return _mm_andnot_ps(a, b);
580 else if constexpr (is_m128d<T_RegisterType>)
581 return _mm_andnot_pd(a, b);
582 else if constexpr (is_m256<T_RegisterType>)
583 return _mm256_andnot_ps(a, b);
585 return _mm256_andnot_pd(a, b);
591 template <I32 t_mask, FloatVectorRegister T_RegisterType>
592 [[nodiscard]]
inline auto mm_blend(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
594 if constexpr (is_m128<T_RegisterType>)
595 return _mm_blend_ps(a, b, t_mask);
596 else if constexpr (is_m128d<T_RegisterType>)
597 return _mm_blend_pd(a, b, t_mask);
598 else if constexpr (is_m256<T_RegisterType>)
599 return _mm256_blend_ps(a, b, t_mask);
601 return _mm256_blend_pd(a, b, t_mask);
607 template <FloatVectorRegister T_RegisterType>
608 [[nodiscard]]
inline auto mm_broadcast(T_RegisterType src) noexcept -> T_RegisterType
610 if constexpr (is_m128<T_RegisterType>)
611 return _mm_broadcastss_ps(src);
612 else if constexpr (is_m128d<T_RegisterType>)
616 return _mm_movedup_pd(src);
617 else if constexpr (is_m256<T_RegisterType>)
618 return _mm256_broadcastss_ps(_mm256_castps256_ps128(src));
620 return _mm256_broadcastsd_pd(_mm256_castpd256_pd128(src));
626 template <FloatVectorRegister T_RegisterTypeIn>
627 [[nodiscard]]
inline auto mm_cast_fi(T_RegisterTypeIn src) noexcept
629 if constexpr (is_m128<T_RegisterTypeIn>)
630 return _mm_castps_si128(src);
631 else if constexpr (is_m128d<T_RegisterTypeIn>)
632 return _mm_castpd_si128(src);
633 else if constexpr (is_m256<T_RegisterTypeIn>)
634 return _mm256_castps_si256(src);
636 return _mm256_castpd_si256(src);
642 template <FloatVectorRegister T_RegisterTypeOut, IntegerVectorRegister T_RegisterTypeIn>
643 [[nodiscard]]
inline auto mm_cast_if(T_RegisterTypeIn src) noexcept -> T_RegisterTypeOut
645 if constexpr (is_m128<T_RegisterTypeOut>)
646 return _mm_castsi128_ps(src);
647 else if constexpr (is_m128d<T_RegisterTypeOut>)
648 return _mm_castsi128_pd(src);
649 else if constexpr (is_m256<T_RegisterTypeOut>)
650 return _mm256_castsi256_ps(src);
652 return _mm256_castsi256_pd(src);
658 template <FloatVectorRegister T_RegisterType>
659 [[nodiscard]]
inline auto mm_cmp_eq(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
661 if constexpr (is_m128<T_RegisterType>)
662 return _mm_cmpeq_ps(lhs, rhs);
663 else if constexpr (is_m128d<T_RegisterType>)
664 return _mm_cmpeq_pd(lhs, rhs);
665 else if constexpr (is_m256<T_RegisterType>)
666 return _mm256_cmp_ps(lhs, rhs, _CMP_EQ_OS);
668 return _mm256_cmp_pd(lhs, rhs, _CMP_EQ_OS);
674 template <FloatVectorRegister T_RegisterType>
675 [[nodiscard]]
inline auto mm_cmp_ge(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
677 if constexpr (is_m128<T_RegisterType>)
678 return _mm_cmpge_ps(lhs, rhs);
679 else if constexpr (is_m128d<T_RegisterType>)
680 return _mm_cmpge_pd(lhs, rhs);
681 else if constexpr (is_m256<T_RegisterType>)
682 return _mm256_cmp_ps(lhs, rhs, _CMP_GE_OS);
684 return _mm256_cmp_pd(lhs, rhs, _CMP_GE_OS);
690 template <FloatVectorRegister T_RegisterType>
691 [[nodiscard]]
inline auto mm_cmp_gt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
693 if constexpr (is_m128<T_RegisterType>)
694 return _mm_cmpgt_ps(lhs, rhs);
695 else if constexpr (is_m128d<T_RegisterType>)
696 return _mm_cmpgt_pd(lhs, rhs);
697 else if constexpr (is_m256<T_RegisterType>)
698 return _mm256_cmp_ps(lhs, rhs, _CMP_GT_OS);
700 return _mm256_cmp_pd(lhs, rhs, _CMP_GT_OS);
706 template <FloatVectorRegister T_RegisterType>
707 [[nodiscard]]
inline auto mm_cmp_le(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
709 if constexpr (is_m128<T_RegisterType>)
710 return _mm_cmple_ps(lhs, rhs);
711 else if constexpr (is_m128d<T_RegisterType>)
712 return _mm_cmple_pd(lhs, rhs);
713 else if constexpr (is_m256<T_RegisterType>)
714 return _mm256_cmp_ps(lhs, rhs, _CMP_LE_OS);
716 return _mm256_cmp_pd(lhs, rhs, _CMP_LE_OS);
722 template <FloatVectorRegister T_RegisterType>
723 [[nodiscard]]
inline auto mm_cmp_lt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
725 if constexpr (is_m128<T_RegisterType>)
726 return _mm_cmplt_ps(lhs, rhs);
727 else if constexpr (is_m128d<T_RegisterType>)
728 return _mm_cmplt_pd(lhs, rhs);
729 else if constexpr (is_m256<T_RegisterType>)
730 return _mm256_cmp_ps(lhs, rhs, _CMP_LT_OS);
732 return _mm256_cmp_pd(lhs, rhs, _CMP_LT_OS);
738 template <FloatVectorRegister T_RegisterType>
741 if constexpr (is_m128<T_RegisterType>)
742 return _mm_cvtss_f32(src);
743 else if constexpr (is_m128d<T_RegisterType>)
744 return _mm_cvtsd_f64(src);
745 else if constexpr (is_m256<T_RegisterType>)
746 return _mm256_cvtss_f32(src);
748 return _mm256_cvtsd_f64(src);
754 template <FloatVectorRegister T_RegisterType>
755 [[nodiscard]]
inline auto mm_fmadd(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
757 if constexpr (is_m128<T_RegisterType>)
758 return _mm_fmadd_ps(a, b, c);
759 else if constexpr (is_m128d<T_RegisterType>)
760 return _mm_fmadd_pd(a, b, c);
761 else if constexpr (is_m256<T_RegisterType>)
762 return _mm256_fmadd_ps(a, b, c);
764 return _mm256_fmadd_pd(a, b, c);
770 template <FloatVectorRegister T_RegisterType>
771 [[nodiscard]]
inline auto mm_fmsub(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
773 if constexpr (is_m128<T_RegisterType>)
774 return _mm_fmsub_ps(a, b, c);
775 else if constexpr (is_m128d<T_RegisterType>)
776 return _mm_fmsub_pd(a, b, c);
777 else if constexpr (is_m256<T_RegisterType>)
778 return _mm256_fmsub_ps(a, b, c);
780 return _mm256_fmsub_pd(a, b, c);
786 template <FloatVectorRegister T_RegisterType>
789 assert(
is_aligned<alignment_bytes<T_RegisterType>>(ptr));
791 if constexpr (is_m128<T_RegisterType>)
792 return _mm_load_ps(ptr);
793 else if constexpr (is_m128d<T_RegisterType>)
794 return _mm_load_pd(ptr);
795 else if constexpr (is_m256<T_RegisterType>)
796 return _mm256_load_ps(ptr);
798 return _mm256_load_pd(ptr);
804 template <IntegerVectorRegister T_RegisterType>
807 if constexpr (is_m128i<T_RegisterType>)
808 return static_cast<U16>(_mm_movemask_epi8(src));
810 return static_cast<U32>(_mm256_movemask_epi8(src));
816 template <FloatVectorRegister T_RegisterType>
817 [[nodiscard]]
inline auto mm_mul(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
819 if constexpr (is_m128<T_RegisterType>)
820 return _mm_mul_ps(lhs, rhs);
821 else if constexpr (is_m128d<T_RegisterType>)
822 return _mm_mul_pd(lhs, rhs);
823 else if constexpr (is_m256<T_RegisterType>)
824 return _mm256_mul_ps(lhs, rhs);
826 return _mm256_mul_pd(lhs, rhs);
832 template <FloatVectorRegister T_RegisterType>
833 [[nodiscard]]
inline auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
835 if constexpr (is_m128<T_RegisterType>)
836 return _mm_or_ps(a, b);
837 else if constexpr (is_m128d<T_RegisterType>)
838 return _mm_or_pd(a, b);
839 else if constexpr (is_m256<T_RegisterType>)
840 return _mm256_or_ps(a, b);
842 return _mm256_or_pd(a, b);
848 template <I32 t_mask, FloatVectorRegister T_RegisterType>
849 [[nodiscard]]
inline auto mm_permute(T_RegisterType src) noexcept -> T_RegisterType
851 if constexpr (is_m128<T_RegisterType>)
852 return _mm_permute_ps(src, t_mask);
853 else if constexpr (is_m128d<T_RegisterType>)
854 return _mm_permute_pd(src, t_mask);
855 else if constexpr (is_m256<T_RegisterType>)
856 return _mm256_permute_ps(src, t_mask);
858 return _mm256_permute_pd(src, t_mask);
864 template <I32 t_mask, FloatAVXRegister T_RegisterType>
865 [[nodiscard]]
inline auto mm_permute2f128(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
867 if constexpr (is_m256<T_RegisterType>)
868 return _mm256_permute2f128_ps(a, b, t_mask);
870 return _mm256_permute2f128_pd(a, b, t_mask);
876 template <FloatVectorRegister T_RegisterType>
879 if constexpr (is_m128<T_RegisterType>)
880 return _mm_set1_ps(value);
881 else if constexpr (is_m128d<T_RegisterType>)
882 return _mm_set1_pd(value);
883 else if constexpr (is_m256<T_RegisterType>)
884 return _mm256_set1_ps(value);
886 return _mm256_set1_pd(value);
893 [[nodiscard]]
inline auto mm_setr(T_Args... args) noexcept -> T_RegisterType
897 if constexpr (is_m128<T_RegisterType>)
898 return _mm_setr_ps(
static_cast<EType
>(args)...);
899 else if constexpr (is_m128d<T_RegisterType>)
900 return _mm_setr_pd(
static_cast<EType
>(args)...);
901 else if constexpr (is_m256<T_RegisterType>)
902 return _mm256_setr_ps(
static_cast<EType
>(args)...);
904 return _mm256_setr_pd(
static_cast<EType
>(args)...);
910 template <FloatVectorRegister T_RegisterType>
911 [[nodiscard]]
inline auto mm_setzero() noexcept -> T_RegisterType
913 if constexpr (is_m128<T_RegisterType>)
914 return _mm_setzero_ps();
915 else if constexpr (is_m128d<T_RegisterType>)
916 return _mm_setzero_pd();
917 else if constexpr (is_m256<T_RegisterType>)
918 return _mm256_setzero_ps();
920 return _mm256_setzero_pd();
926 template <UST t_mask, FloatVectorRegister T_RegisterType>
927 [[nodiscard]]
inline auto mm_shuffle(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
929 if constexpr (is_m128<T_RegisterType>)
930 return _mm_shuffle_ps(a, b, t_mask);
931 else if constexpr (is_m128d<T_RegisterType>)
932 return _mm_shuffle_pd(a, b, t_mask);
933 else if constexpr (is_m256<T_RegisterType>)
934 return _mm256_shuffle_ps(a, b, t_mask);
936 return _mm256_shuffle_pd(a, b, t_mask);
942 template <FloatVectorRegister T_RegisterType>
945 assert(
is_aligned<alignment_bytes<T_RegisterType>>(ptr));
947 if constexpr (is_m128<T_RegisterType>)
948 _mm_store_ps(ptr, reg);
949 else if constexpr (is_m128d<T_RegisterType>)
950 _mm_store_pd(ptr, reg);
951 else if constexpr (is_m256<T_RegisterType>)
952 _mm256_store_ps(ptr, reg);
954 _mm256_store_pd(ptr, reg);
960 template <FloatVectorRegister T_RegisterType>
961 [[nodiscard]]
inline auto mm_sub(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
963 if constexpr (is_m128<T_RegisterType>)
964 return _mm_sub_ps(lhs, rhs);
965 else if constexpr (is_m128d<T_RegisterType>)
966 return _mm_sub_pd(lhs, rhs);
967 else if constexpr (is_m256<T_RegisterType>)
968 return _mm256_sub_ps(lhs, rhs);
970 return _mm256_sub_pd(lhs, rhs);
976 template <FloatVectorRegister T_RegisterType>
977 [[nodiscard]]
inline auto mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
979 if constexpr (is_m128<T_RegisterType>)
980 return _mm_xor_ps(a, b);
981 else if constexpr (is_m128d<T_RegisterType>)
982 return _mm_xor_pd(a, b);
983 else if constexpr (is_m256<T_RegisterType>)
984 return _mm256_xor_ps(a, b);
986 return _mm256_xor_pd(a, b);
std::uint32_t U32
32 bit unsigned integer type
Definition: fundamental_types.h:27
std::uint16_t U16
16 bit unsigned integer type
Definition: fundamental_types.h:26
auto is_aligned(const volatile T_Type *pointer) noexcept -> bool
Check if a passed pointer is aligned.
Definition: pointer_operations.h:191
auto mm_cvt_float(T_RegisterType src) -> ElementType< T_RegisterType >
Return the first element of src.
Definition: intrinsics.h:739
auto mm_sub(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Subtract rhs element-wise from rhs and return the result.
Definition: intrinsics.h:961
auto mm_setr(T_Args... args) noexcept -> T_RegisterType
Set register elements with the supplied values in reverse order.
Definition: intrinsics.h:893
auto mm_broadcast(T_RegisterType src) noexcept -> T_RegisterType
Broadcasts the lowest floating point element across lanes to all elements of the returned register.
Definition: intrinsics.h:608
auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise NOT of all elements in a and then AND with b.
Definition: intrinsics.h:576
auto mm_cmp_le(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are less equal than the ones in rhs.
Definition: intrinsics.h:707
auto mm_fmsub(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
Perform an element-wise multiplication of a and b, subtract c and return the result.
Definition: intrinsics.h:771
auto mm_cast_if(T_RegisterTypeIn src) noexcept -> T_RegisterTypeOut
Bit cast an integer vector register to an equally sized floating-point vector register.
Definition: intrinsics.h:643
typename std::conditional_t< is_any_of< T_RegisterType, __m128d, __m256d >(), F64, F32 > ElementType
The element type of an x86 vector register that is based on floating-point types.
Definition: definitions.h:212
auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise AND of a and b.
Definition: intrinsics.h:560
auto mm_fmadd(T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
Perform an element-wise multiplication of a and b, add c and return the result.
Definition: intrinsics.h:755
auto mm_blend(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Blend elements from a and b using a control mask and return the resulting vector register.
Definition: intrinsics.h:592
auto mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise XOR of a and b.
Definition: intrinsics.h:977
auto mm_shuffle(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Return a register with the first half of the lane elements selected from a and the second half from b...
Definition: intrinsics.h:927
void mm_store(ElementType< T_RegisterType > *ptr, T_RegisterType reg) noexcept
Store the content of a register to a memory address.
Definition: intrinsics.h:943
auto mm_cmp_ge(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are greater equal than the ones in rhs.
Definition: intrinsics.h:675
concept FloatVectorRegister
Concept for a x86 vector register that has floating-point elements.
Definition: definitions.h:39
auto mm_cmp_lt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are less than the ones in rhs.
Definition: intrinsics.h:723
auto mm_cmp_gt(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare element-wise if the register elements of lhs are greater than the ones in rhs.
Definition: intrinsics.h:691
auto mm_permute2f128(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Shuffle 128-bits lanes selected by t_mask from a and b, and return the results in a new register.
Definition: intrinsics.h:865
auto mm_mul(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Perform an element-wise multiplication of lhs and rhs and return the result.
Definition: intrinsics.h:817
auto mm_cmp_eq(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Compare the register elements in lhs and rhs for equality and return the result.
Definition: intrinsics.h:659
auto mm_movemask_epi8(T_RegisterType src) noexcept
Create mask from the most significant bit of each 8-bit element in src, and return the result as unsi...
Definition: intrinsics.h:805
auto mm_cast_fi(T_RegisterTypeIn src) noexcept
Bit cast a floating-point vector register to an equally sized integer vector register.
Definition: intrinsics.h:627
auto mm_add(T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
Perform an element-wise addition of lhs and rhs and return the result.
Definition: intrinsics.h:544
auto mm_set1(ElementType< T_RegisterType > value) noexcept -> T_RegisterType
Broadcast a single value a to all elements of the register.
Definition: intrinsics.h:877
auto mm_setzero() noexcept -> T_RegisterType
Return a vector register with all elements set to zero.
Definition: intrinsics.h:911
auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise OR of a and b.
Definition: intrinsics.h:833
auto mm_permute(T_RegisterType src) noexcept -> T_RegisterType
Shuffle the elements in src using the control mask t_mask and return the resulting vector register.
Definition: intrinsics.h:849
auto mm_load(ElementType< T_RegisterType > *ptr) noexcept -> T_RegisterType
Load data from an aligned memory location into a new register.
Definition: intrinsics.h:787
Contains x86 vectorization specific constants, concepts and definitions.
This header includes the correct x86 header depending on the operation system.