Mjolnir Core
Core functionality of the Mjolnir API
Classes | Typedefs | Functions | Variables
Core x86

This submodule of the core module provides x86 related functions and classes. More...

Classes

struct  VectorDataArray< T_RegisterType >
 A std::array of correct alignment, type and size to store all elements of a vector register type. More...
 

Typedefs

template<FloatVectorRegister T_RegisterType>
using ElementType = typename std::conditional_t< is_any_of< T_RegisterType, __m128d, __m256d >(), F64, F32 >
 The element type of an x86 vector register that is based on floating-point types. More...
 

Functions

template<FloatVectorRegister T_RegisterType>
auto abs (T_RegisterType src) noexcept -> T_RegisterType
 Return a new register with the absolute values of the input register. More...
 
template<UST t_shift, FloatVectorRegister T_RegisterType>
auto align_right ([[maybe_unused]] T_RegisterType lhs, [[maybe_unused]] T_RegisterType rhs) noexcept -> T_RegisterType
 Concatenate two floating-point registers, shift the result right by t_shift elements, and return the result. More...
 
template<UST... t_args, FloatVectorRegister T_RegisterType>
auto blend (T_RegisterType src_0, T_RegisterType src_1) noexcept -> T_RegisterType
 Blend elements from src_0 and src_1 into a new register. More...
 
template<UST t_index, FloatVectorRegister T_RegisterType>
auto blend_above (T_RegisterType src_0, T_RegisterType src_1) noexcept -> T_RegisterType
 Get a register where elements with a higher index than t_index are copied from src_1and the rest from src_0. More...
 
template<UST t_index, FloatVectorRegister T_RegisterType>
auto blend_at (T_RegisterType src_0, T_RegisterType src_1) noexcept -> T_RegisterType
 Get a new register where the element with index t_index is taken from src_1 and the rest from src_0 More...
 
template<UST t_index, FloatVectorRegister T_RegisterType>
auto blend_below (T_RegisterType src_0, T_RegisterType src_1) noexcept -> T_RegisterType
 Get a register where elements with a lower index than t_index are copied from src_1and the rest from src_0. More...
 
template<UST t_index_first, UST t_index_last, FloatVectorRegister T_RegisterType>
auto blend_from_to (T_RegisterType src_0, T_RegisterType src_1) noexcept -> T_RegisterType
 Get a register where elements inside the specified index range are taken from src_1 and the rest from src_0. More...
 
template<UST t_index, FloatVectorRegister T_RegisterType>
auto broadcast (T_RegisterType src) noexcept -> T_RegisterType
 Broadcast a register element per lane selected by t_index. More...
 
template<UST t_index_0, UST t_index_1, FloatAVXRegister T_RegisterType>
auto broadcast (T_RegisterType src) noexcept -> T_RegisterType
 Broadcast a register element per lane selected by t_index_0 and t_index_1. More...
 
template<UST t_index, FloatVectorRegister T_RegisterType>
auto broadcast_across_lanes (T_RegisterType src) noexcept -> T_RegisterType
 Broadcast a register element selected by t_index across lane boundaries. More...
 
template<FloatVectorRegister T_RegisterType>
auto broadcast_element_sum (T_RegisterType src) noexcept -> T_RegisterType
 Calculate the sum of all elements of src, broadcast it into a new register and return the result. More...
 
template<FloatVectorRegister T_RegisterType>
auto compare_all_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of two registers are equal. More...
 
template<FloatVectorRegister T_RegisterType>
auto compare_all_greater (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are greater than the ones of rhs. More...
 
template<FloatVectorRegister T_RegisterType>
auto compare_all_greater_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are greater or equal than the ones of rhs. More...
 
template<FloatVectorRegister T_RegisterType>
auto compare_all_less (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are less than the corresponding ones in rhs. More...
 
template<FloatVectorRegister T_RegisterType>
auto compare_all_less_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are less equal than the corresponding ones in rhs. More...
 
template<FloatVectorRegister T_RegisterType, std::invocable< T_RegisterType, T_RegisterType > T_CompFunc>
auto compare_all_true (T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
 Return true only if the element-wise comparisons of lhs and rhs yields true for all elements. More...
 
template<UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
auto compare_in_sequence_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of both registers are equal inside of a specified sequence of indices. More...
 
template<UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
auto compare_in_sequence_greater (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are greater than in rhs inside of a specified sequence. More...
 
template<UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
auto compare_in_sequence_greater_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are greater or equal than in rhs inside of a specified sequence. More...
 
template<UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
auto compare_in_sequence_less (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are less than in rhs inside of a specified sequence of indices. More...
 
template<UST t_idx_start, UST t_idx_end, FloatVectorRegister T_RegisterType>
auto compare_in_sequence_less_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all elements of lhs are less equal than in rhs inside of a specified sequence of indices. More...
 
template<UST t_idx_first, UST t_length, FloatVectorRegister T_RegisterType, std::invocable< T_RegisterType, T_RegisterType > T_CompFunc>
auto compare_in_sequence_true (T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
 Return true only if the comparisons of all register elements are true inside of a specified sequence. More...
 
template<bool... t_cmp, FloatVectorRegister T_RegisterType>
auto compare_selected_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all selected register elements are equal. More...
 
template<bool... t_cmp, FloatVectorRegister T_RegisterType>
auto compare_selected_greater (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all selected elements of lhs are greater than the corresponding ones in rhs. More...
 
template<bool... t_cmp, FloatVectorRegister T_RegisterType>
auto compare_selected_greater_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all selected elements of lhs are greater or equal than the corresponding ones in rhs. More...
 
template<bool... t_cmp, FloatVectorRegister T_RegisterType>
auto compare_selected_less (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all selected elements of lhs are less than the corresponding ones in rhs. More...
 
template<bool... t_cmp, FloatVectorRegister T_RegisterType>
auto compare_selected_less_equal (T_RegisterType lhs, T_RegisterType rhs) noexcept -> bool
 Return true only if all selected elements of lhs are less equal than the corresponding ones in rhs. More...
 
template<bool... t_cmp, FloatVectorRegister T_RegisterType, std::invocable< T_RegisterType, T_RegisterType > T_CompFunc>
auto compare_selected_true (T_RegisterType lhs, T_RegisterType rhs, T_CompFunc comp_func) noexcept -> bool
 Return true only if the comparisons of all selected register elements yields true. More...
 
template<bool t_all_mag_positive = false, FloatVectorRegister T_RegisterType>
auto copy_sign (T_RegisterType src_magnitude, T_RegisterType src_sign) noexcept -> T_RegisterType
 Return a new register consisting of the magnitudes from src_magnitude and the signs of src_sign. More...
 
template<FloatVectorRegister T_RegisterType>
auto element_sum (T_RegisterType src) noexcept -> ElementType< T_RegisterType >
 Return the sum of all elements from src. More...
 
template<UST t_num_elements, FloatVectorRegister T_RegisterType>
auto element_sum_first_n (T_RegisterType src) noexcept -> ElementType< T_RegisterType >
 Return the sum of the first t_num_elements elements from src. More...
 
template<UST t_index_0, UST t_index_1, FloatVectorRegister T_RegisterType>
void exchange (T_RegisterType &reg_0, T_RegisterType &reg_1) noexcept
 Exchange two elements selected by indices between two registers. More...
 
template<UST t_index, FloatVectorRegister T_RegisterType>
auto get (T_RegisterType src) noexcept -> ElementType< T_RegisterType >
 Get the value of a specific element from a vector register. More...
 
template<FloatVectorRegister T_RegisterType>
auto get (T_RegisterType src, UST index) noexcept -> ElementType< T_RegisterType >
 Get the value of a specific element from a vector register. More...
 
template<UST t_index_src, UST t_index_dst, bool... t_set_zero>
auto insert (__m128 src, __m128 dst) noexcept -> __m128
 Insert a single element from src into dst and return the result in a new __m128 register. More...
 
template<FloatVectorRegister T_RegisterType>
auto is_memory_zero (T_RegisterType a) noexcept -> bool
 Return true if the whole memory of the passed register is zero and false otherwise. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_add (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Perform an element-wise addition of lhs and rhs and return the result. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_and (T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
 Compute the bitwise AND of a and b. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_andnot (T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
 Compute the bitwise NOT of all elements in a and then AND with b. More...
 
template<I32 t_mask, FloatVectorRegister T_RegisterType>
auto mm_blend (T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
 Blend elements from a and b using a control mask and return the resulting vector register. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_broadcast (T_RegisterType src) noexcept -> T_RegisterType
 Broadcasts the lowest floating point element across lanes to all elements of the returned register. More...
 
template<FloatVectorRegister T_RegisterTypeIn>
auto mm_cast_fi (T_RegisterTypeIn src) noexcept
 Bit cast a floating-point vector register to an equally sized integer vector register. More...
 
template<FloatVectorRegister T_RegisterTypeOut, IntegerVectorRegister T_RegisterTypeIn>
auto mm_cast_if (T_RegisterTypeIn src) noexcept -> T_RegisterTypeOut
 Bit cast an integer vector register to an equally sized floating-point vector register. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_cmp_eq (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Compare the register elements in lhs and rhs for equality and return the result. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_cmp_ge (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Compare element-wise if the register elements of lhs are greater equal than the ones in rhs. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_cmp_gt (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Compare element-wise if the register elements of lhs are greater than the ones in rhs. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_cmp_le (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Compare element-wise if the register elements of lhs are less equal than the ones in rhs. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_cmp_lt (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Compare element-wise if the register elements of lhs are less than the ones in rhs. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_cvt_float (T_RegisterType src) -> ElementType< T_RegisterType >
 Return the first element of src. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_fmadd (T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
 Perform an element-wise multiplication of a and b, add c and return the result. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_fmsub (T_RegisterType a, T_RegisterType b, T_RegisterType c) noexcept -> T_RegisterType
 Perform an element-wise multiplication of a and b, subtract c and return the result. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_load (ElementType< T_RegisterType > *ptr) noexcept -> T_RegisterType
 Load data from an aligned memory location into a new register. More...
 
template<IntegerVectorRegister T_RegisterType>
auto mm_movemask_epi8 (T_RegisterType src) noexcept
 Create mask from the most significant bit of each 8-bit element in src, and return the result as unsigned integer. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_mul (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Perform an element-wise multiplication of lhs and rhs and return the result. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_or (T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
 Compute the bitwise OR of a and b. More...
 
template<I32 t_mask, FloatVectorRegister T_RegisterType>
auto mm_permute (T_RegisterType src) noexcept -> T_RegisterType
 Shuffle the elements in src using the control mask t_mask and return the resulting vector register. More...
 
template<I32 t_mask, FloatAVXRegister T_RegisterType>
auto mm_permute2f128 (T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
 Shuffle 128-bits lanes selected by t_mask from a and b, and return the results in a new register. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_set1 (ElementType< T_RegisterType > value) noexcept -> T_RegisterType
 Broadcast a single value a to all elements of the register. More...
 
template<FloatVectorRegister T_RegisterType, typename... T_Args>
auto mm_setr (T_Args... args) noexcept -> T_RegisterType
 Set register elements with the supplied values in reverse order. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_setzero () noexcept -> T_RegisterType
 Return a vector register with all elements set to zero. More...
 
template<UST t_mask, FloatVectorRegister T_RegisterType>
auto mm_shuffle (T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
 Return a register with the first half of the lane elements selected from a and the second half from b. More...
 
template<FloatVectorRegister T_RegisterType>
void mm_store (ElementType< T_RegisterType > *ptr, T_RegisterType reg) noexcept
 Store the content of a register to a memory address. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_sub (T_RegisterType lhs, T_RegisterType rhs) noexcept -> T_RegisterType
 Subtract rhs element-wise from rhs and return the result. More...
 
template<FloatVectorRegister T_RegisterType>
auto mm_xor (T_RegisterType a, T_RegisterType b) noexcept -> T_RegisterType
 Compute the bitwise XOR of a and b. More...
 
template<FloatVectorRegister T_RegisterType>
auto negate_all (T_RegisterType src) noexcept -> T_RegisterType
 Return a new register with the negated elements of the source register. More...
 
template<bool... t_neg, FloatVectorRegister T_RegisterType>
auto negate_selected (T_RegisterType src) noexcept -> T_RegisterType
 Return a copy of the source register with the selected elements being negated. More...
 
template<UST... t_indices, FloatVectorRegister T_RegisterType>
auto permute (T_RegisterType src) noexcept -> T_RegisterType
 Shuffle the elements of a vector register within lanes using indices and return the result in a new register. More...
 
template<UST... t_indices, FloatVectorRegister T_RegisterType>
auto permute_across_lanes (T_RegisterType src) noexcept -> T_RegisterType
 Shuffle the elements of a vector register across lanes using indices and return the result in a new register. More...
 
template<UST t_lane_0, UST t_lane_1, FloatAVXRegister T_RegisterType>
auto permute_lanes (T_RegisterType src) noexcept -> T_RegisterType
 Create a new AVX register by an arbitrary combination of the source registers lanes. More...
 
template<UST t_index, FloatVectorRegister T_RegisterType>
void set (T_RegisterType &dst, ElementType< T_RegisterType > value) noexcept
 Set the value of a specific vector register element. More...
 
template<FloatVectorRegister T_RegisterType>
void set (T_RegisterType &dst, UST index, ElementType< T_RegisterType > value) noexcept
 Set the value of a specific vector register element. More...
 
template<UST... t_indices, FloatVectorRegister T_RegisterType>
auto shuffle (T_RegisterType src_0, T_RegisterType src_1) noexcept -> T_RegisterType
 Return a register with the first half of the lane elements selected from src_0 and the second half from src_1. More...
 
template<UST t_src_0, UST t_lane_0, UST t_src_1, UST t_lane_1, FloatAVXRegister T_RegisterType>
auto shuffle_lanes (T_RegisterType src_0, T_RegisterType src_1) noexcept -> T_RegisterType
 Create a new AVX register by combining arbitrary lanes from two source registers. More...
 
template<UST t_idx_0, UST t_idx_1, FloatVectorRegister T_RegisterType>
auto swap (T_RegisterType src) noexcept -> T_RegisterType
 Swap two elements of a register and return the result. More...
 
template<FloatAVXRegister T_RegisterType>
auto swap_lanes (T_RegisterType src) noexcept -> T_RegisterType
 Swap the lanes of an AVX register and return the result. More...
 
template<bool t_swap_lanes, FloatAVXRegister T_RegisterType>
auto swap_lanes_if (T_RegisterType src) noexcept -> T_RegisterType
 Return a new register with or without swapped lanes depending on the value of the boolean template parameter. More...
 

Variables

template<VectorRegister T_RegisterType>
constexpr UST alignment_bytes = internal::get_alignment_bytes<T_RegisterType>()
 Alignment requirement of an x86 vector register in bytes. More...
 
template<typename T_Type >
concept DoublePrecisionVectorRegister = is_any_of<T_Type, __m128d, __m256d>()
 Concept for a x86 vector register that has double precision elements. More...
 
template<typename T_Type >
concept FloatAVXRegister = is_any_of<T_Type, __m256, __m256d>()
 Concept for a x86 vector register that has floating-point elements. More...
 
template<typename T_Type >
concept FloatSSERegister = is_any_of<T_Type, __m128, __m128d>()
 Concept for a x86 SSE vector register that has floating-point elements. More...
 
template<typename T_Type >
concept FloatVectorRegister = is_any_of<T_Type, __m128, __m128d, __m256, __m256d>()
 Concept for a x86 vector register that has floating-point elements. More...
 
template<typename T_Type >
concept IntegerVectorRegister = is_any_of<T_Type, __m128i, __m256i>()
 Concept for a x86 vector register that has integer elements. More...
 
template<typename T_Type >
constexpr bool is_avx_register = is_any_of<T_Type, __m256, __m256d, __m256i>()
 Type dependent constant that is only true for AVX vector registers. More...
 
template<FloatVectorRegister T_RegisterType>
constexpr bool is_double_precision = std::is_same_v<ElementType<T_RegisterType>, F64>
 true if the element type has double precision and false otherwise. More...
 
template<typename T_Type >
constexpr bool is_float_register = is_any_of<T_Type, __m128, __m128d, __m256, __m256d>()
 Type dependent constant that is only true for x86 vector registers that have floating-point types as elements. More...
 
template<typename T_Type >
constexpr bool is_integer_register = is_any_of<T_Type, __m128i, __m256i>()
 Type dependent constant that is only true for x86 vector registers that have integer types as elements. More...
 
template<typename T_Type >
constexpr bool is_m128 = std::is_same_v<T_Type, __m128>
 Type dependent constant that is only true for __m128 and false for all other types. More...
 
template<typename T_Type >
constexpr bool is_m128d = std::is_same_v<T_Type, __m128d>
 Type dependent constant that is only true for __m128d and false for all other types. More...
 
template<typename T_Type >
constexpr bool is_m128i = std::is_same_v<T_Type, __m128i>
 Type dependent constant that is only true for __m128i and false for all other types. More...
 
template<typename T_Type >
constexpr bool is_m256 = std::is_same_v<T_Type, __m256>
 Type dependent constant that is only true for __m256 and false for all other types. More...
 
template<typename T_Type >
constexpr bool is_m256d = std::is_same_v<T_Type, __m256d>
 Type dependent constant that is only true for __m256d and false for all other types. More...
 
template<typename T_Type >
constexpr bool is_m256i = std::is_same_v<T_Type, __m256i>
 Type dependent constant that is only true for __m256i and false for all other types. More...
 
template<FloatVectorRegister T_RegisterType>
constexpr bool is_multi_lane = num_lanes<T_RegisterType> > 1
 true if the register has multiple lanes and false otherwise. More...
 
template<FloatVectorRegister T_RegisterType>
constexpr bool is_single_precision = std::is_same_v<ElementType<T_RegisterType>, F32>
 true if the element type has single precision and false otherwise. More...
 
template<typename T_Type >
constexpr bool is_sse_register = is_any_of<T_Type, __m128, __m128d, __m128i>()
 Type dependent constant that is only true for SSE vector registers. More...
 
template<typename T_Type >
constexpr bool is_vector_register = is_any_of<T_Type, __m128, __m128d, __m128i, __m256, __m256d, __m256i>()
 Type dependent constant that is only true for supported x86 vector registers. More...
 
template<FloatVectorRegister T_RegisterType>
constexpr UST num_elements = sizeof(T_RegisterType) / sizeof(ElementType<T_RegisterType>)
 Number of register elements. More...
 
template<FloatVectorRegister T_RegisterType>
constexpr UST num_lane_elements = num_elements<T_RegisterType> / num_lanes<T_RegisterType>
 Number of elements per register lane. More...
 
template<VectorRegister T_RegisterType>
constexpr UST num_lanes = internal::get_num_lanes<T_RegisterType>()
 Number of register lanes. More...
 
template<typename T_Type >
concept SinglePrecisionVectorRegister = is_any_of<T_Type, __m128, __m256>()
 Concept for a x86 vector register that has single precision elements. More...
 
template<typename T_Type >
concept VectorRegister = is_any_of<T_Type, __m128, __m128d, __m128i, __m256, __m256d, __m256i>()
 Concept for a x86 vector register. More...
 

Detailed Description

This submodule of the core module provides x86 related functions and classes.