17 namespace mjolnir::x86
36 template <FloatVectorRegister T_RegisterType>
37 [[nodiscard]]
inline auto compare_all_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
53 template <FloatVectorRegister T_RegisterType>
54 [[nodiscard]]
inline auto compare_all_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
70 template <FloatVectorRegister T_RegisterType>
87 template <FloatVectorRegister T_RegisterType>
88 [[nodiscard]]
inline auto compare_all_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
104 template <FloatVectorRegister T_RegisterType>
105 [[nodiscard]]
inline auto compare_all_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
126 template <FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
127 [[nodiscard]]
inline auto compare_all_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
148 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
169 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
190 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
211 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
232 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
258 template <
UST t_idx_first,
261 std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
262 [[nodiscard]]
inline auto
283 [[nodiscard]]
inline auto compare_selected_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
343 [[nodiscard]]
inline auto compare_selected_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
387 template <
bool... t_cmp,
FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
388 [[nodiscard]]
inline auto compare_selected_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
403 template <FloatVectorRegister T_RegisterType>
404 [[nodiscard]]
inline auto is_memory_zero(T_RegisterType a) noexcept -> bool;
423 namespace mjolnir::x86
435 template <FloatVectorRegister T_RegisterType>
438 [[nodiscard]]
inline auto operator()(T_RegisterType lhs, T_RegisterType rhs)
const noexcept -> T_RegisterType
440 return mm_cmp_eq<T_RegisterType>(lhs, rhs);
447 template <FloatVectorRegister T_RegisterType>
448 struct CompareGreater
450 [[nodiscard]]
inline auto operator()(T_RegisterType lhs, T_RegisterType rhs)
const noexcept -> T_RegisterType
452 return mm_cmp_gt<T_RegisterType>(lhs, rhs);
459 template <FloatVectorRegister T_RegisterType>
460 struct CompareGreaterEqual
462 [[nodiscard]]
inline auto operator()(T_RegisterType lhs, T_RegisterType rhs)
const noexcept -> T_RegisterType
464 return mm_cmp_ge<T_RegisterType>(lhs, rhs);
471 template <FloatVectorRegister T_RegisterType>
474 [[nodiscard]]
inline auto operator()(T_RegisterType lhs, T_RegisterType rhs)
const noexcept -> T_RegisterType
476 return mm_cmp_lt<T_RegisterType>(lhs, rhs);
483 template <FloatVectorRegister T_RegisterType>
484 struct CompareLessEqual
486 [[nodiscard]]
inline auto operator()(T_RegisterType lhs, T_RegisterType rhs)
const noexcept -> T_RegisterType
488 return mm_cmp_le<T_RegisterType>(lhs, rhs);
499 template <FloatVectorRegister T_RegisterType>
500 [[nodiscard]]
inline auto compare_all_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept ->
bool
502 return compare_all_true(lhs, rhs, internal::CompareEqual<T_RegisterType>());
508 template <FloatVectorRegister T_RegisterType>
511 return compare_all_true(lhs, rhs, internal::CompareGreater<T_RegisterType>());
517 template <FloatVectorRegister T_RegisterType>
520 return compare_all_true(lhs, rhs, internal::CompareGreaterEqual<T_RegisterType>());
526 template <FloatVectorRegister T_RegisterType>
527 [[nodiscard]]
inline auto compare_all_less(T_RegisterType lhs, T_RegisterType rhs) noexcept ->
bool
535 template <FloatVectorRegister T_RegisterType>
538 return compare_all_true(lhs, rhs, internal::CompareLessEqual<T_RegisterType>());
544 template <FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
545 [[nodiscard]]
inline auto compare_all_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
548 constexpr
UST n_e = num_elements<T_RegisterType>;
549 return compare_in_sequence_true<0, n_e>(lhs, rhs, comp_func);
555 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
558 return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareEqual<T_RegisterType>());
564 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
567 return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareGreater<T_RegisterType>());
573 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
576 return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareGreaterEqual<T_RegisterType>());
582 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
585 return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareLess<T_RegisterType>());
591 template <UST t_
idx_start, UST t_
idx_end, FloatVectorRegister T_RegisterType>
594 return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareLessEqual<T_RegisterType>());
600 template <
UST t_idx_first,
603 std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
604 [[nodiscard]]
inline auto
607 constexpr
UST n_e = num_elements<T_RegisterType>;
609 static_assert(t_length > 0,
"At least 1 element must be compared.");
610 static_assert(t_idx_first + t_length <= n_e,
"Sequence exceeds data length.");
612 constexpr
auto get_boolean_array = []() constexpr->std::array<
bool, n_e>
614 std::array<bool, n_e> arr = {{{0}}};
615 for (UST i = t_idx_first; i < t_idx_first + t_length; ++i)
619 constexpr
auto b = get_boolean_array();
622 if constexpr (is_m128d<T_RegisterType>)
623 return compare_selected_true<b[0], b[1]>(lhs, rhs, comp_func);
624 else if constexpr (is_m128<T_RegisterType> || is_m256d<T_RegisterType>)
625 return compare_selected_true<b[0], b[1], b[2], b[3]>(lhs, rhs, comp_func);
628 return compare_selected_true<b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]>(lhs, rhs, comp_func);
679 template <
bool... t_cmp,
FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
680 [[nodiscard]]
inline auto compare_selected_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
683 constexpr
UST n_e = num_elements<T_RegisterType>;
687 static_assert(
sizeof...(t_cmp) == n_e,
"Number of template parameters must be equal to the number of elements.");
688 static_assert(! pack_all_false<t_cmp...>(),
"At least one template parameter must be `true`.");
692 constexpr
auto ref = bit_construct_from_ints<n_bits, decltype(result), (static_cast<UST>(t_cmp) * val)...>(
true);
694 if constexpr (! pack_all_true<t_cmp...>())
697 return result == ref;
704 template <FloatVectorRegister T_RegisterType>
707 constexpr
UST n_bytes =
sizeof(T_RegisterType);
708 constexpr
UST alignment = alignment_bytes<T_RegisterType>;
710 alignas(alignment) constexpr std::array<U8, n_bytes> ref = {{{0}}};
712 return !
static_cast<bool>(std::memcmp(&a, &ref, n_bytes));
Contains utility functions for bit related operations like setting and reading specific bits.
Defines the fundamental data types.
std::size_t UST
Unsigned integer type that is returned by sizeof operations.
Definition: fundamental_types.h:29
constexpr auto power_of_2(std::integral auto exponent) noexcept -> T_Type
Calculate the power of 2 using an integer based exponent.
Definition: math.h:140
auto compare_all_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater or equal than the ones of rhs.
Definition: comparison.h:518
auto compare_all_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater than the ones of rhs.
Definition: comparison.h:509
typename std::conditional_t< is_any_of< T_RegisterType, __m128d, __m256d >(), F64, F32 > ElementType
The element type of an x86 vector register that is based on floating-point types.
Definition: definitions.h:212
auto compare_all_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of two registers are equal.
Definition: comparison.h:500
auto compare_in_sequence_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater than in rhs inside of a specified sequence.
Definition: comparison.h:565
auto compare_selected_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
Return true only if the comparisons of all selected register elements yields true.
Definition: comparison.h:680
auto compare_in_sequence_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less than in rhs inside of a specified sequence of indice...
Definition: comparison.h:583
auto compare_all_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less equal than the corresponding ones in rhs.
Definition: comparison.h:536
auto compare_in_sequence_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
Return true only if the comparisons of all register elements are true inside of a specified sequence.
Definition: comparison.h:605
auto compare_selected_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are greater than the corresponding ones in rhs.
Definition: comparison.h:644
auto compare_all_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
Return true only if the element-wise comparisons of lhs and rhs yields true for all elements.
Definition: comparison.h:545
auto compare_in_sequence_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less equal than in rhs inside of a specified sequence of ...
Definition: comparison.h:592
auto compare_selected_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are less equal than the corresponding ones in rhs.
Definition: comparison.h:671
auto compare_in_sequence_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of both registers are equal inside of a specified sequence of indice...
Definition: comparison.h:556
concept FloatVectorRegister
Concept for a x86 vector register that has floating-point elements.
Definition: definitions.h:39
auto compare_all_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less than the corresponding ones in rhs.
Definition: comparison.h:527
auto is_memory_zero(T_RegisterType a) noexcept -> bool
Return true if the whole memory of the passed register is zero and false otherwise.
Definition: comparison.h:705
auto compare_selected_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected register elements are equal.
Definition: comparison.h:635
auto mm_movemask_epi8(T_RegisterType src) noexcept
Create mask from the most significant bit of each 8-bit element in src, and return the result as unsi...
Definition: intrinsics.h:805
auto mm_cast_fi(T_RegisterTypeIn src) noexcept
Bit cast a floating-point vector register to an equally sized integer vector register.
Definition: intrinsics.h:627
auto compare_selected_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are less than the corresponding ones in rhs.
Definition: comparison.h:662
auto compare_selected_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are greater or equal than the corresponding ones in ...
Definition: comparison.h:653
auto compare_in_sequence_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater or equal than in rhs inside of a specified sequen...
Definition: comparison.h:574
Contains generalized/template versions of the x86 intrinsics.
Contains basic mathematical functions.
Contains utility functions for parameter packs.
Contains x86 vectorization specific constants, concepts and definitions.
This header includes the correct x86 header depending on the operation system.