Mjolnir Core
Core functionality of the Mjolnir API
comparison.h
Go to the documentation of this file.
1 
7 
8 #pragma once
9 
12 
13 #include <concepts>
14 
15 // === DECLARATION ====================================================================================================
16 
17 namespace mjolnir::x86
18 {
21 
22 
36 template <FloatVectorRegister T_RegisterType>
37 [[nodiscard]] inline auto compare_all_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
38 
39 
53 template <FloatVectorRegister T_RegisterType>
54 [[nodiscard]] inline auto compare_all_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
55 
56 
70 template <FloatVectorRegister T_RegisterType>
71 [[nodiscard]] inline auto compare_all_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
72 
73 
87 template <FloatVectorRegister T_RegisterType>
88 [[nodiscard]] inline auto compare_all_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
89 
90 
104 template <FloatVectorRegister T_RegisterType>
105 [[nodiscard]] inline auto compare_all_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
106 
107 
126 template <FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
127 [[nodiscard]] inline auto compare_all_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
128  -> bool;
129 
130 
148 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
149 [[nodiscard]] inline auto compare_in_sequence_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
150 
151 
169 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
170 [[nodiscard]] inline auto compare_in_sequence_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
171 
172 
190 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
191 [[nodiscard]] inline auto compare_in_sequence_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
192 
193 
211 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
212 [[nodiscard]] inline auto compare_in_sequence_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
213 
214 
232 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
233 [[nodiscard]] inline auto compare_in_sequence_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
234 
235 
258 template <UST t_idx_first,
259  UST t_length,
260  FloatVectorRegister T_RegisterType,
261  std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
262 [[nodiscard]] inline auto
263 compare_in_sequence_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool;
264 
265 
282 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
283 [[nodiscard]] inline auto compare_selected_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
284 
285 
302 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
303 [[nodiscard]] inline auto compare_selected_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
304 
305 
322 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
323 [[nodiscard]] inline auto compare_selected_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
324 
325 
342 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
343 [[nodiscard]] inline auto compare_selected_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
344 
345 
362 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
363 [[nodiscard]] inline auto compare_selected_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool;
364 
365 
387 template <bool... t_cmp, FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
388 [[nodiscard]] inline auto compare_selected_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
389  -> bool;
390 
391 
403 template <FloatVectorRegister T_RegisterType>
404 [[nodiscard]] inline auto is_memory_zero(T_RegisterType a) noexcept -> bool;
405 
406 
408 } // namespace mjolnir::x86
409 
410 
411 // === DEFINITIONS ====================================================================================================
412 
413 
414 #include "mjolnir/core/math/math.h"
418 #include "mjolnir/core/x86/x86.h"
419 
420 #include <cstring>
421 #include <limits>
422 
423 namespace mjolnir::x86
424 {
426 namespace internal
427 {
434 
435 template <FloatVectorRegister T_RegisterType>
436 struct CompareEqual
437 {
438  [[nodiscard]] inline auto operator()(T_RegisterType lhs, T_RegisterType rhs) const noexcept -> T_RegisterType
439  {
440  return mm_cmp_eq<T_RegisterType>(lhs, rhs);
441  }
442 };
443 
444 
445 // --------------------------------------------------------
446 
447 template <FloatVectorRegister T_RegisterType>
448 struct CompareGreater
449 {
450  [[nodiscard]] inline auto operator()(T_RegisterType lhs, T_RegisterType rhs) const noexcept -> T_RegisterType
451  {
452  return mm_cmp_gt<T_RegisterType>(lhs, rhs);
453  }
454 };
455 
456 
457 // --------------------------------------------------------
458 
459 template <FloatVectorRegister T_RegisterType>
460 struct CompareGreaterEqual
461 {
462  [[nodiscard]] inline auto operator()(T_RegisterType lhs, T_RegisterType rhs) const noexcept -> T_RegisterType
463  {
464  return mm_cmp_ge<T_RegisterType>(lhs, rhs);
465  }
466 };
467 
468 
469 // --------------------------------------------------------
470 
471 template <FloatVectorRegister T_RegisterType>
472 struct CompareLess
473 {
474  [[nodiscard]] inline auto operator()(T_RegisterType lhs, T_RegisterType rhs) const noexcept -> T_RegisterType
475  {
476  return mm_cmp_lt<T_RegisterType>(lhs, rhs);
477  }
478 };
479 
480 
481 // --------------------------------------------------------
482 
483 template <FloatVectorRegister T_RegisterType>
484 struct CompareLessEqual
485 {
486  [[nodiscard]] inline auto operator()(T_RegisterType lhs, T_RegisterType rhs) const noexcept -> T_RegisterType
487  {
488  return mm_cmp_le<T_RegisterType>(lhs, rhs);
489  }
490 };
491 
492 
493 } // namespace internal
495 
496 
497 // --------------------------------------------------------------------------------------------------------------------
498 
499 template <FloatVectorRegister T_RegisterType>
500 [[nodiscard]] inline auto compare_all_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
501 {
502  return compare_all_true(lhs, rhs, internal::CompareEqual<T_RegisterType>());
503 }
504 
505 
506 // --------------------------------------------------------------------------------------------------------------------
507 
508 template <FloatVectorRegister T_RegisterType>
509 [[nodiscard]] inline auto compare_all_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
510 {
511  return compare_all_true(lhs, rhs, internal::CompareGreater<T_RegisterType>());
512 }
513 
514 
515 // --------------------------------------------------------------------------------------------------------------------
516 
517 template <FloatVectorRegister T_RegisterType>
518 [[nodiscard]] inline auto compare_all_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
519 {
520  return compare_all_true(lhs, rhs, internal::CompareGreaterEqual<T_RegisterType>());
521 }
522 
523 
524 // --------------------------------------------------------------------------------------------------------------------
525 
526 template <FloatVectorRegister T_RegisterType>
527 [[nodiscard]] inline auto compare_all_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
528 {
529  return compare_all_true(lhs, rhs, internal::CompareLess<T_RegisterType>());
530 }
531 
532 
533 // --------------------------------------------------------------------------------------------------------------------
534 
535 template <FloatVectorRegister T_RegisterType>
536 [[nodiscard]] inline auto compare_all_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
537 {
538  return compare_all_true(lhs, rhs, internal::CompareLessEqual<T_RegisterType>());
539 }
540 
541 
542 // --------------------------------------------------------------------------------------------------------------------
543 
544 template <FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
545 [[nodiscard]] inline auto compare_all_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
546  -> bool
547 {
548  constexpr UST n_e = num_elements<T_RegisterType>;
549  return compare_in_sequence_true<0, n_e>(lhs, rhs, comp_func);
550 }
551 
552 
553 // --------------------------------------------------------------------------------------------------------------------
554 
555 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
556 [[nodiscard]] inline auto compare_in_sequence_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
557 {
558  return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareEqual<T_RegisterType>());
559 }
560 
561 
562 // --------------------------------------------------------------------------------------------------------------------
563 
564 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
565 [[nodiscard]] inline auto compare_in_sequence_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
566 {
567  return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareGreater<T_RegisterType>());
568 }
569 
570 
571 // --------------------------------------------------------------------------------------------------------------------
572 
573 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
574 [[nodiscard]] inline auto compare_in_sequence_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
575 {
576  return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareGreaterEqual<T_RegisterType>());
577 }
578 
579 
580 // --------------------------------------------------------------------------------------------------------------------
581 
582 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
583 [[nodiscard]] inline auto compare_in_sequence_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
584 {
585  return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareLess<T_RegisterType>());
586 }
587 
588 
589 // --------------------------------------------------------------------------------------------------------------------
590 
591 template <UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
592 [[nodiscard]] inline auto compare_in_sequence_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
593 {
594  return compare_in_sequence_true<t_idx_start, t_idx_end>(lhs, rhs, internal::CompareLessEqual<T_RegisterType>());
595 }
596 
597 
598 // --------------------------------------------------------------------------------------------------------------------
599 
600 template <UST t_idx_first,
601  UST t_length,
602  FloatVectorRegister T_RegisterType,
603  std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
604 [[nodiscard]] inline auto
605 compare_in_sequence_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
606 {
607  constexpr UST n_e = num_elements<T_RegisterType>;
608 
609  static_assert(t_length > 0, "At least 1 element must be compared.");
610  static_assert(t_idx_first + t_length <= n_e, "Sequence exceeds data length.");
611 
612  constexpr auto get_boolean_array = []() constexpr->std::array<bool, n_e>
613  {
614  std::array<bool, n_e> arr = {{{0}}};
615  for (UST i = t_idx_first; i < t_idx_first + t_length; ++i)
616  arr.at(i) = true;
617  return arr;
618  };
619  constexpr auto b = get_boolean_array();
620 
621 
622  if constexpr (is_m128d<T_RegisterType>)
623  return compare_selected_true<b[0], b[1]>(lhs, rhs, comp_func);
624  else if constexpr (is_m128<T_RegisterType> || is_m256d<T_RegisterType>)
625  return compare_selected_true<b[0], b[1], b[2], b[3]>(lhs, rhs, comp_func);
626  else
627  // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
628  return compare_selected_true<b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]>(lhs, rhs, comp_func);
629 }
630 
631 
632 // --------------------------------------------------------------------------------------------------------------------
633 
634 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
635 [[nodiscard]] inline auto compare_selected_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
636 {
637  return compare_selected_true<t_cmp...>(lhs, rhs, internal::CompareEqual<T_RegisterType>());
638 }
639 
640 
641 // --------------------------------------------------------------------------------------------------------------------
642 
643 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
644 [[nodiscard]] inline auto compare_selected_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
645 {
646  return compare_selected_true<t_cmp...>(lhs, rhs, internal::CompareGreater<T_RegisterType>());
647 }
648 
649 
650 // --------------------------------------------------------------------------------------------------------------------
651 
652 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
653 [[nodiscard]] inline auto compare_selected_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
654 {
655  return compare_selected_true<t_cmp...>(lhs, rhs, internal::CompareGreaterEqual<T_RegisterType>());
656 }
657 
658 
659 // --------------------------------------------------------------------------------------------------------------------
660 
661 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
662 [[nodiscard]] inline auto compare_selected_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
663 {
664  return compare_selected_true<t_cmp...>(lhs, rhs, internal::CompareLess<T_RegisterType>());
665 }
666 
667 
668 // --------------------------------------------------------------------------------------------------------------------
669 
670 template <bool... t_cmp, FloatVectorRegister T_RegisterType>
671 [[nodiscard]] inline auto compare_selected_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
672 {
673  return compare_selected_true<t_cmp...>(lhs, rhs, internal::CompareLessEqual<T_RegisterType>());
674 }
675 
676 
677 // --------------------------------------------------------------------------------------------------------------------
678 
679 template <bool... t_cmp, FloatVectorRegister T_RegisterType, std::invocable<T_RegisterType, T_RegisterType> T_CompFunc>
680 [[nodiscard]] inline auto compare_selected_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept
681  -> bool
682 {
683  constexpr UST n_e = num_elements<T_RegisterType>;
684  constexpr UST n_bits = sizeof(ElementType<T_RegisterType>);
685  constexpr UST val = power_of_2(n_bits) - 1;
686 
687  static_assert(sizeof...(t_cmp) == n_e, "Number of template parameters must be equal to the number of elements.");
688  static_assert(! pack_all_false<t_cmp...>(), "At least one template parameter must be `true`.");
689 
690 
691  auto result = mm_movemask_epi8(mm_cast_fi(comp_func(lhs, rhs)));
692  constexpr auto ref = bit_construct_from_ints<n_bits, decltype(result), (static_cast<UST>(t_cmp) * val)...>(true);
693 
694  if constexpr (! pack_all_true<t_cmp...>())
695  result &= ref; // Set bits of elements that shouldn't be compared to zero
696 
697  return result == ref;
698 }
699 
700 
701 // --------------------------------------------------------------------------------------------------------------------
702 
703 
704 template <FloatVectorRegister T_RegisterType>
705 [[nodiscard]] inline auto is_memory_zero(T_RegisterType a) noexcept -> bool
706 {
707  constexpr UST n_bytes = sizeof(T_RegisterType);
708  constexpr UST alignment = alignment_bytes<T_RegisterType>;
709 
710  alignas(alignment) constexpr std::array<U8, n_bytes> ref = {{{0}}};
711 
712  return ! static_cast<bool>(std::memcmp(&a, &ref, n_bytes));
713 }
714 
715 
716 } // namespace mjolnir::x86
Contains utility functions for bit related operations like setting and reading specific bits.
Defines the fundamental data types.
std::size_t UST
Unsigned integer type that is returned by sizeof operations.
Definition: fundamental_types.h:29
constexpr auto power_of_2(std::integral auto exponent) noexcept -> T_Type
Calculate the power of 2 using an integer based exponent.
Definition: math.h:140
auto compare_all_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater or equal than the ones of rhs.
Definition: comparison.h:518
auto compare_all_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater than the ones of rhs.
Definition: comparison.h:509
typename std::conditional_t< is_any_of< T_RegisterType, __m128d, __m256d >(), F64, F32 > ElementType
The element type of an x86 vector register that is based on floating-point types.
Definition: definitions.h:212
auto compare_all_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of two registers are equal.
Definition: comparison.h:500
auto compare_in_sequence_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater than in rhs inside of a specified sequence.
Definition: comparison.h:565
auto compare_selected_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
Return true only if the comparisons of all selected register elements yields true.
Definition: comparison.h:680
auto compare_in_sequence_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less than in rhs inside of a specified sequence of indice...
Definition: comparison.h:583
auto compare_all_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less equal than the corresponding ones in rhs.
Definition: comparison.h:536
auto compare_in_sequence_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
Return true only if the comparisons of all register elements are true inside of a specified sequence.
Definition: comparison.h:605
auto compare_selected_greater(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are greater than the corresponding ones in rhs.
Definition: comparison.h:644
auto compare_all_true(T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
Return true only if the element-wise comparisons of lhs and rhs yields true for all elements.
Definition: comparison.h:545
auto compare_in_sequence_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less equal than in rhs inside of a specified sequence of ...
Definition: comparison.h:592
auto compare_selected_less_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are less equal than the corresponding ones in rhs.
Definition: comparison.h:671
auto compare_in_sequence_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of both registers are equal inside of a specified sequence of indice...
Definition: comparison.h:556
concept FloatVectorRegister
Concept for a x86 vector register that has floating-point elements.
Definition: definitions.h:39
auto compare_all_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are less than the corresponding ones in rhs.
Definition: comparison.h:527
auto is_memory_zero(T_RegisterType a) noexcept -> bool
Return true if the whole memory of the passed register is zero and false otherwise.
Definition: comparison.h:705
auto compare_selected_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected register elements are equal.
Definition: comparison.h:635
auto mm_movemask_epi8(T_RegisterType src) noexcept
Create mask from the most significant bit of each 8-bit element in src, and return the result as unsi...
Definition: intrinsics.h:805
auto mm_cast_fi(T_RegisterTypeIn src) noexcept
Bit cast a floating-point vector register to an equally sized integer vector register.
Definition: intrinsics.h:627
auto compare_selected_less(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are less than the corresponding ones in rhs.
Definition: comparison.h:662
auto compare_selected_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all selected elements of lhs are greater or equal than the corresponding ones in ...
Definition: comparison.h:653
auto compare_in_sequence_greater_equal(T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
Return true only if all elements of lhs are greater or equal than in rhs inside of a specified sequen...
Definition: comparison.h:574
Contains generalized/template versions of the x86 intrinsics.
Contains basic mathematical functions.
Contains utility functions for parameter packs.
Contains x86 vectorization specific constants, concepts and definitions.
This header includes the correct x86 header depending on the operation system.