Mjolnir Core
Core functionality of the Mjolnir API
sign_manipulation.h
Go to the documentation of this file.
1 
7 
8 #pragma once
9 
10 
11 // === DECLARATIONS ===================================================================================================
12 
14 
15 
16 namespace mjolnir::x86
17 {
20 
21 
36 template <FloatVectorRegister T_RegisterType>
37 [[nodiscard]] inline auto abs(T_RegisterType src) noexcept -> T_RegisterType;
38 
39 
54 template <bool t_all_mag_positive = false, FloatVectorRegister T_RegisterType>
55 [[nodiscard]] inline auto copy_sign(T_RegisterType src_magnitude, T_RegisterType src_sign) noexcept -> T_RegisterType;
56 
57 
69 template <FloatVectorRegister T_RegisterType>
70 [[nodiscard]] inline auto negate_all(T_RegisterType src) noexcept -> T_RegisterType;
71 
72 
87 template <bool... t_neg, FloatVectorRegister T_RegisterType>
88 [[nodiscard]] inline auto negate_selected(T_RegisterType src) noexcept -> T_RegisterType;
89 
90 
92 } // namespace mjolnir::x86
93 
94 
95 // === DEFINITIONS ====================================================================================================
96 
99 
100 #include <cassert>
101 
102 
103 namespace mjolnir::x86
104 {
105 // --------------------------------------------------------------------------------------------------------------------
106 
107 template <FloatVectorRegister T_RegisterType>
108 [[nodiscard]] inline auto abs(T_RegisterType src) noexcept -> T_RegisterType
109 {
110  using EType = ElementType<T_RegisterType>;
111  const auto mask = mm_set1<T_RegisterType>(static_cast<EType>(-0.0));
112 
113  return mm_andnot(mask, src);
114 }
115 
116 
117 // --------------------------------------------------------------------------------------------------------------------
118 
119 template <bool t_all_mag_positive, FloatVectorRegister T_RegisterType>
120 [[nodiscard]] inline auto copy_sign(T_RegisterType src_magnitude, T_RegisterType src_sign) noexcept -> T_RegisterType
121 {
122  using EType = ElementType<T_RegisterType>;
123  const auto mask = mm_set1<T_RegisterType>(static_cast<EType>(-0.0));
124 
125  T_RegisterType sign = mm_and(src_sign, mask);
126 
127  if constexpr (! t_all_mag_positive)
128  src_magnitude = mm_andnot(mask, src_magnitude);
129 
130  assert(is_memory_zero(mm_and(src_magnitude, mask)) && "Signed bit of one or more values set."); // NOLINT
131  return mm_or(sign, src_magnitude);
132 }
133 
134 
135 // --------------------------------------------------------------------------------------------------------------------
136 
137 
138 template <FloatVectorRegister T_RegisterType>
139 [[nodiscard]] inline auto negate_all(T_RegisterType src) noexcept -> T_RegisterType
140 {
141  constexpr UST n_e = num_elements<T_RegisterType>;
142 
143  if constexpr (n_e == 2)
144  return negate_selected<1, 1>(src);
145  else if constexpr (n_e == 4)
146  return negate_selected<1, 1, 1, 1>(src);
147  else
148  return negate_selected<1, 1, 1, 1, 1, 1, 1, 1>(src);
149 }
150 
151 
152 // --------------------------------------------------------------------------------------------------------------------
153 
154 
155 template <bool... t_neg, FloatVectorRegister T_RegisterType>
156 [[nodiscard]] inline auto negate_selected(T_RegisterType src) noexcept -> T_RegisterType
157 {
158  using EType = ElementType<T_RegisterType>;
159  constexpr UST n_e = num_elements<T_RegisterType>;
160 
161  static_assert(sizeof...(t_neg) == n_e,
162  "Number of boolean template parameters must be 0 or equal to the number of register elements");
163 
164  constexpr auto get_mask = [](bool a) constexpr->EType
165  {
166  if (a)
167  return static_cast<EType>(-0.);
168  return static_cast<EType>(0.);
169  };
170  auto mask = mm_setr<T_RegisterType>(get_mask(t_neg)...);
171 
172  return mm_xor(src, mask);
173 }
174 
175 } // namespace mjolnir::x86
Contains functions to compare vector registers.
std::size_t UST
Unsigned integer type that is returned by sizeof operations.
Definition: fundamental_types.h:29
auto negate_selected(T_RegisterType src) noexcept -> T_RegisterType
Return a copy of the source register with the selected elements being negated.
Definition: sign_manipulation.h:156
auto mm_andnot(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise NOT of all elements in a and then AND with b.
Definition: intrinsics.h:576
auto negate_all(T_RegisterType src) noexcept -> T_RegisterType
Return a new register with the negated elements of the source register.
Definition: sign_manipulation.h:139
typename std::conditional_t< is_any_of< T_RegisterType, __m128d, __m256d >(), F64, F32 > ElementType
The element type of an x86 vector register that is based on floating-point types.
Definition: definitions.h:212
auto mm_and(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise AND of a and b.
Definition: intrinsics.h:560
auto mm_xor(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise XOR of a and b.
Definition: intrinsics.h:977
concept FloatVectorRegister
Concept for a x86 vector register that has floating-point elements.
Definition: definitions.h:39
auto is_memory_zero(T_RegisterType a) noexcept -> bool
Return true if the whole memory of the passed register is zero and false otherwise.
Definition: comparison.h:705
auto copy_sign(T_RegisterType src_magnitude, T_RegisterType src_sign) noexcept -> T_RegisterType
Return a new register consisting of the magnitudes from src_magnitude and the signs of src_sign.
Definition: sign_manipulation.h:120
auto abs(T_RegisterType src) noexcept -> T_RegisterType
Return a new register with the absolute values of the input register.
Definition: sign_manipulation.h:108
auto mm_or(T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
Compute the bitwise OR of a and b.
Definition: intrinsics.h:833
Contains generalized/template versions of the x86 intrinsics.
Contains x86 vectorization specific constants, concepts and definitions.