core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21    unsafe {
22        let a = a.as_i32x16();
23        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38    unsafe {
39        let abs = _mm512_abs_epi32(a).as_i32x16();
40        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
41    }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54    unsafe {
55        let abs = _mm512_abs_epi32(a).as_i32x16();
56        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
57    }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68    unsafe {
69        let abs = _mm256_abs_epi32(a).as_i32x8();
70        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
71    }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82    unsafe {
83        let abs = _mm256_abs_epi32(a).as_i32x8();
84        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
85    }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96    unsafe {
97        let abs = _mm_abs_epi32(a).as_i32x4();
98        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
99    }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi32(a).as_i32x4();
112        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124    unsafe {
125        let a = a.as_i64x8();
126        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
127        transmute(r)
128    }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139    unsafe {
140        let abs = _mm512_abs_epi64(a).as_i64x8();
141        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
142    }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153    unsafe {
154        let abs = _mm512_abs_epi64(a).as_i64x8();
155        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
156    }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167    unsafe {
168        let a = a.as_i64x4();
169        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
170        transmute(r)
171    }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182    unsafe {
183        let abs = _mm256_abs_epi64(a).as_i64x4();
184        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
185    }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196    unsafe {
197        let abs = _mm256_abs_epi64(a).as_i64x4();
198        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
199    }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210    unsafe {
211        let a = a.as_i64x2();
212        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
213        transmute(r)
214    }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225    unsafe {
226        let abs = _mm_abs_epi64(a).as_i64x2();
227        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239    unsafe {
240        let abs = _mm_abs_epi64(a).as_i64x2();
241        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
242    }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253    unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275    unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297    unsafe {
298        let mov = a.as_i32x16();
299        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
300    }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311    unsafe {
312        let mov = a.as_i32x16();
313        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
314    }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325    unsafe {
326        let mov = a.as_i32x8();
327        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
328    }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339    unsafe {
340        let mov = a.as_i32x8();
341        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
342    }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353    unsafe {
354        let mov = a.as_i32x4();
355        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
356    }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367    unsafe {
368        let mov = a.as_i32x4();
369        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
370    }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381    unsafe {
382        let mov = a.as_i64x8();
383        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
384    }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395    unsafe {
396        let mov = a.as_i64x8();
397        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
398    }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409    unsafe {
410        let mov = a.as_i64x4();
411        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
412    }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423    unsafe {
424        let mov = a.as_i64x4();
425        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437    unsafe {
438        let mov = a.as_i64x2();
439        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
440    }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451    unsafe {
452        let mov = a.as_i64x2();
453        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
454    }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465    unsafe {
466        let mov = a.as_f32x16();
467        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
468    }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479    unsafe {
480        let mov = a.as_f32x16();
481        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
482    }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493    unsafe {
494        let mov = a.as_f32x8();
495        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
496    }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507    unsafe {
508        let mov = a.as_f32x8();
509        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
510    }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521    unsafe {
522        let mov = a.as_f32x4();
523        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
524    }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535    unsafe {
536        let mov = a.as_f32x4();
537        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
538    }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549    unsafe {
550        let mov = a.as_f64x8();
551        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
552    }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563    unsafe {
564        let mov = a.as_f64x8();
565        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
566    }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577    unsafe {
578        let mov = a.as_f64x4();
579        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
580    }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591    unsafe {
592        let mov = a.as_f64x4();
593        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
594    }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605    unsafe {
606        let mov = a.as_f64x2();
607        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
608    }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619    unsafe {
620        let mov = a.as_f64x2();
621        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
622    }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644    unsafe {
645        let add = _mm512_add_epi32(a, b).as_i32x16();
646        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
647    }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658    unsafe {
659        let add = _mm512_add_epi32(a, b).as_i32x16();
660        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
661    }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672    unsafe {
673        let add = _mm256_add_epi32(a, b).as_i32x8();
674        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
675    }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686    unsafe {
687        let add = _mm256_add_epi32(a, b).as_i32x8();
688        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
689    }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700    unsafe {
701        let add = _mm_add_epi32(a, b).as_i32x4();
702        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
703    }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714    unsafe {
715        let add = _mm_add_epi32(a, b).as_i32x4();
716        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
717    }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739    unsafe {
740        let add = _mm512_add_epi64(a, b).as_i64x8();
741        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
742    }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_add_epi64(a, b).as_i64x8();
755        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
756    }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767    unsafe {
768        let add = _mm256_add_epi64(a, b).as_i64x4();
769        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
770    }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781    unsafe {
782        let add = _mm256_add_epi64(a, b).as_i64x4();
783        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
784    }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795    unsafe {
796        let add = _mm_add_epi64(a, b).as_i64x2();
797        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
798    }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809    unsafe {
810        let add = _mm_add_epi64(a, b).as_i64x2();
811        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
812    }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834    unsafe {
835        let add = _mm512_add_ps(a, b).as_f32x16();
836        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
837    }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848    unsafe {
849        let add = _mm512_add_ps(a, b).as_f32x16();
850        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
851    }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862    unsafe {
863        let add = _mm256_add_ps(a, b).as_f32x8();
864        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
865    }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876    unsafe {
877        let add = _mm256_add_ps(a, b).as_f32x8();
878        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
879    }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890    unsafe {
891        let add = _mm_add_ps(a, b).as_f32x4();
892        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
893    }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904    unsafe {
905        let add = _mm_add_ps(a, b).as_f32x4();
906        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
907    }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929    unsafe {
930        let add = _mm512_add_pd(a, b).as_f64x8();
931        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
932    }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943    unsafe {
944        let add = _mm512_add_pd(a, b).as_f64x8();
945        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
946    }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957    unsafe {
958        let add = _mm256_add_pd(a, b).as_f64x4();
959        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
960    }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971    unsafe {
972        let add = _mm256_add_pd(a, b).as_f64x4();
973        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
974    }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985    unsafe {
986        let add = _mm_add_pd(a, b).as_f64x2();
987        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
988    }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999    unsafe {
1000        let add = _mm_add_pd(a, b).as_f64x2();
1001        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1002    }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024    unsafe {
1025        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1026        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1027    }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038    unsafe {
1039        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1040        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1041    }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052    unsafe {
1053        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1054        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1055    }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066    unsafe {
1067        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1068        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1069    }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080    unsafe {
1081        let sub = _mm_sub_epi32(a, b).as_i32x4();
1082        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1083    }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094    unsafe {
1095        let sub = _mm_sub_epi32(a, b).as_i32x4();
1096        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1097    }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119    unsafe {
1120        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1122    }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133    unsafe {
1134        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1135        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1136    }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147    unsafe {
1148        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1149        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1150    }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161    unsafe {
1162        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1163        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1164    }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175    unsafe {
1176        let sub = _mm_sub_epi64(a, b).as_i64x2();
1177        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1178    }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189    unsafe {
1190        let sub = _mm_sub_epi64(a, b).as_i64x2();
1191        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1192    }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214    unsafe {
1215        let sub = _mm512_sub_ps(a, b).as_f32x16();
1216        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1217    }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228    unsafe {
1229        let sub = _mm512_sub_ps(a, b).as_f32x16();
1230        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1231    }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242    unsafe {
1243        let sub = _mm256_sub_ps(a, b).as_f32x8();
1244        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1245    }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256    unsafe {
1257        let sub = _mm256_sub_ps(a, b).as_f32x8();
1258        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1259    }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270    unsafe {
1271        let sub = _mm_sub_ps(a, b).as_f32x4();
1272        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1273    }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284    unsafe {
1285        let sub = _mm_sub_ps(a, b).as_f32x4();
1286        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1287    }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309    unsafe {
1310        let sub = _mm512_sub_pd(a, b).as_f64x8();
1311        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1312    }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323    unsafe {
1324        let sub = _mm512_sub_pd(a, b).as_f64x8();
1325        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1326    }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337    unsafe {
1338        let sub = _mm256_sub_pd(a, b).as_f64x4();
1339        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1340    }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351    unsafe {
1352        let sub = _mm256_sub_pd(a, b).as_f64x4();
1353        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1354    }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365    unsafe {
1366        let sub = _mm_sub_pd(a, b).as_f64x2();
1367        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1368    }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379    unsafe {
1380        let sub = _mm_sub_pd(a, b).as_f64x2();
1381        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1382    }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393    unsafe {
1394        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396        transmute(simd_mul(a, b))
1397    }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408    unsafe {
1409        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1410        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1411    }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422    unsafe {
1423        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1424        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1425    }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436    unsafe {
1437        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1438        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1439    }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450    unsafe {
1451        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1452        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1453    }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464    unsafe {
1465        let mul = _mm_mul_epi32(a, b).as_i64x2();
1466        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1467    }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478    unsafe {
1479        let mul = _mm_mul_epi32(a, b).as_i64x2();
1480        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1481    }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503    unsafe {
1504        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1505        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1506    }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517    unsafe {
1518        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1519        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1520    }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531    unsafe {
1532        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1533        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1534    }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545    unsafe {
1546        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1547        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1548    }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559    unsafe {
1560        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1561        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1562    }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573    unsafe {
1574        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1575        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1576    }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600    unsafe {
1601        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1602        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1603    }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614    unsafe {
1615        let a = a.as_u64x8();
1616        let b = b.as_u64x8();
1617        let mask = u64x8::splat(u32::MAX.into());
1618        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1619    }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630    unsafe {
1631        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1632        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1633    }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644    unsafe {
1645        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1646        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1647    }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658    unsafe {
1659        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1660        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1661    }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672    unsafe {
1673        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1674        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1675    }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686    unsafe {
1687        let mul = _mm_mul_epu32(a, b).as_u64x2();
1688        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1689    }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700    unsafe {
1701        let mul = _mm_mul_epu32(a, b).as_u64x2();
1702        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1703    }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725    unsafe {
1726        let mul = _mm512_mul_ps(a, b).as_f32x16();
1727        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1728    }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739    unsafe {
1740        let mul = _mm512_mul_ps(a, b).as_f32x16();
1741        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1742    }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753    unsafe {
1754        let mul = _mm256_mul_ps(a, b).as_f32x8();
1755        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1756    }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767    unsafe {
1768        let mul = _mm256_mul_ps(a, b).as_f32x8();
1769        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1770    }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781    unsafe {
1782        let mul = _mm_mul_ps(a, b).as_f32x4();
1783        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1784    }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795    unsafe {
1796        let mul = _mm_mul_ps(a, b).as_f32x4();
1797        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1798    }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820    unsafe {
1821        let mul = _mm512_mul_pd(a, b).as_f64x8();
1822        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1823    }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834    unsafe {
1835        let mul = _mm512_mul_pd(a, b).as_f64x8();
1836        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1837    }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848    unsafe {
1849        let mul = _mm256_mul_pd(a, b).as_f64x4();
1850        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1851    }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862    unsafe {
1863        let mul = _mm256_mul_pd(a, b).as_f64x4();
1864        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
1865    }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876    unsafe {
1877        let mul = _mm_mul_pd(a, b).as_f64x2();
1878        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
1879    }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890    unsafe {
1891        let mul = _mm_mul_pd(a, b).as_f64x2();
1892        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
1893    }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915    unsafe {
1916        let div = _mm512_div_ps(a, b).as_f32x16();
1917        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
1918    }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929    unsafe {
1930        let div = _mm512_div_ps(a, b).as_f32x16();
1931        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
1932    }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943    unsafe {
1944        let div = _mm256_div_ps(a, b).as_f32x8();
1945        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
1946    }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957    unsafe {
1958        let div = _mm256_div_ps(a, b).as_f32x8();
1959        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
1960    }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971    unsafe {
1972        let div = _mm_div_ps(a, b).as_f32x4();
1973        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
1974    }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985    unsafe {
1986        let div = _mm_div_ps(a, b).as_f32x4();
1987        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
1988    }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010    unsafe {
2011        let div = _mm512_div_pd(a, b).as_f64x8();
2012        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2013    }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024    unsafe {
2025        let div = _mm512_div_pd(a, b).as_f64x8();
2026        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2027    }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038    unsafe {
2039        let div = _mm256_div_pd(a, b).as_f64x4();
2040        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2041    }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052    unsafe {
2053        let div = _mm256_div_pd(a, b).as_f64x4();
2054        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2055    }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066    unsafe {
2067        let div = _mm_div_pd(a, b).as_f64x2();
2068        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2069    }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080    unsafe {
2081        let div = _mm_div_pd(a, b).as_f64x2();
2082        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2083    }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094    unsafe {
2095        let a = a.as_i32x16();
2096        let b = b.as_i32x16();
2097        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2098    }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109    unsafe {
2110        let max = _mm512_max_epi32(a, b).as_i32x16();
2111        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2112    }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123    unsafe {
2124        let max = _mm512_max_epi32(a, b).as_i32x16();
2125        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2126    }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137    unsafe {
2138        let max = _mm256_max_epi32(a, b).as_i32x8();
2139        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2140    }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151    unsafe {
2152        let max = _mm256_max_epi32(a, b).as_i32x8();
2153        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2154    }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165    unsafe {
2166        let max = _mm_max_epi32(a, b).as_i32x4();
2167        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2168    }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179    unsafe {
2180        let max = _mm_max_epi32(a, b).as_i32x4();
2181        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2182    }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193    unsafe {
2194        let a = a.as_i64x8();
2195        let b = b.as_i64x8();
2196        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2197    }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208    unsafe {
2209        let max = _mm512_max_epi64(a, b).as_i64x8();
2210        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2211    }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222    unsafe {
2223        let max = _mm512_max_epi64(a, b).as_i64x8();
2224        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2225    }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236    unsafe {
2237        let a = a.as_i64x4();
2238        let b = b.as_i64x4();
2239        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2240    }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251    unsafe {
2252        let max = _mm256_max_epi64(a, b).as_i64x4();
2253        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2254    }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265    unsafe {
2266        let max = _mm256_max_epi64(a, b).as_i64x4();
2267        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2268    }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279    unsafe {
2280        let a = a.as_i64x2();
2281        let b = b.as_i64x2();
2282        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2283    }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294    unsafe {
2295        let max = _mm_max_epi64(a, b).as_i64x2();
2296        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2297    }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308    unsafe {
2309        let max = _mm_max_epi64(a, b).as_i64x2();
2310        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2311    }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322    unsafe {
2323        transmute(vmaxps(
2324            a.as_f32x16(),
2325            b.as_f32x16(),
2326            _MM_FROUND_CUR_DIRECTION,
2327        ))
2328    }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339    unsafe {
2340        let max = _mm512_max_ps(a, b).as_f32x16();
2341        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2342    }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353    unsafe {
2354        let max = _mm512_max_ps(a, b).as_f32x16();
2355        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2356    }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367    unsafe {
2368        let max = _mm256_max_ps(a, b).as_f32x8();
2369        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2370    }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381    unsafe {
2382        let max = _mm256_max_ps(a, b).as_f32x8();
2383        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2384    }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395    unsafe {
2396        let max = _mm_max_ps(a, b).as_f32x4();
2397        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2398    }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409    unsafe {
2410        let max = _mm_max_ps(a, b).as_f32x4();
2411        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2412    }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434    unsafe {
2435        let max = _mm512_max_pd(a, b).as_f64x8();
2436        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2437    }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448    unsafe {
2449        let max = _mm512_max_pd(a, b).as_f64x8();
2450        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2451    }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462    unsafe {
2463        let max = _mm256_max_pd(a, b).as_f64x4();
2464        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2465    }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476    unsafe {
2477        let max = _mm256_max_pd(a, b).as_f64x4();
2478        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2479    }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490    unsafe {
2491        let max = _mm_max_pd(a, b).as_f64x2();
2492        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2493    }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504    unsafe {
2505        let max = _mm_max_pd(a, b).as_f64x2();
2506        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2507    }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518    unsafe {
2519        let a = a.as_u32x16();
2520        let b = b.as_u32x16();
2521        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2522    }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533    unsafe {
2534        let max = _mm512_max_epu32(a, b).as_u32x16();
2535        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2536    }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547    unsafe {
2548        let max = _mm512_max_epu32(a, b).as_u32x16();
2549        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2550    }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561    unsafe {
2562        let max = _mm256_max_epu32(a, b).as_u32x8();
2563        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2564    }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575    unsafe {
2576        let max = _mm256_max_epu32(a, b).as_u32x8();
2577        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2578    }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589    unsafe {
2590        let max = _mm_max_epu32(a, b).as_u32x4();
2591        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2592    }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603    unsafe {
2604        let max = _mm_max_epu32(a, b).as_u32x4();
2605        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2606    }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617    unsafe {
2618        let a = a.as_u64x8();
2619        let b = b.as_u64x8();
2620        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2621    }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632    unsafe {
2633        let max = _mm512_max_epu64(a, b).as_u64x8();
2634        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2635    }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646    unsafe {
2647        let max = _mm512_max_epu64(a, b).as_u64x8();
2648        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2649    }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660    unsafe {
2661        let a = a.as_u64x4();
2662        let b = b.as_u64x4();
2663        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2664    }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675    unsafe {
2676        let max = _mm256_max_epu64(a, b).as_u64x4();
2677        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2678    }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689    unsafe {
2690        let max = _mm256_max_epu64(a, b).as_u64x4();
2691        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2692    }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703    unsafe {
2704        let a = a.as_u64x2();
2705        let b = b.as_u64x2();
2706        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2707    }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718    unsafe {
2719        let max = _mm_max_epu64(a, b).as_u64x2();
2720        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2721    }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732    unsafe {
2733        let max = _mm_max_epu64(a, b).as_u64x2();
2734        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2735    }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746    unsafe {
2747        let a = a.as_i32x16();
2748        let b = b.as_i32x16();
2749        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2750    }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761    unsafe {
2762        let min = _mm512_min_epi32(a, b).as_i32x16();
2763        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2764    }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775    unsafe {
2776        let min = _mm512_min_epi32(a, b).as_i32x16();
2777        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2778    }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789    unsafe {
2790        let min = _mm256_min_epi32(a, b).as_i32x8();
2791        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2792    }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803    unsafe {
2804        let min = _mm256_min_epi32(a, b).as_i32x8();
2805        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2806    }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817    unsafe {
2818        let min = _mm_min_epi32(a, b).as_i32x4();
2819        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2820    }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831    unsafe {
2832        let min = _mm_min_epi32(a, b).as_i32x4();
2833        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2834    }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845    unsafe {
2846        let a = a.as_i64x8();
2847        let b = b.as_i64x8();
2848        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2849    }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860    unsafe {
2861        let min = _mm512_min_epi64(a, b).as_i64x8();
2862        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
2863    }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874    unsafe {
2875        let min = _mm512_min_epi64(a, b).as_i64x8();
2876        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
2877    }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888    unsafe {
2889        let a = a.as_i64x4();
2890        let b = b.as_i64x4();
2891        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2892    }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903    unsafe {
2904        let min = _mm256_min_epi64(a, b).as_i64x4();
2905        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
2906    }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917    unsafe {
2918        let min = _mm256_min_epi64(a, b).as_i64x4();
2919        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
2920    }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931    unsafe {
2932        let a = a.as_i64x2();
2933        let b = b.as_i64x2();
2934        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2935    }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946    unsafe {
2947        let min = _mm_min_epi64(a, b).as_i64x2();
2948        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
2949    }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960    unsafe {
2961        let min = _mm_min_epi64(a, b).as_i64x2();
2962        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
2963    }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974    unsafe {
2975        transmute(vminps(
2976            a.as_f32x16(),
2977            b.as_f32x16(),
2978            _MM_FROUND_CUR_DIRECTION,
2979        ))
2980    }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991    unsafe {
2992        let min = _mm512_min_ps(a, b).as_f32x16();
2993        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
2994    }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005    unsafe {
3006        let min = _mm512_min_ps(a, b).as_f32x16();
3007        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3008    }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019    unsafe {
3020        let min = _mm256_min_ps(a, b).as_f32x8();
3021        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3022    }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033    unsafe {
3034        let min = _mm256_min_ps(a, b).as_f32x8();
3035        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3036    }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047    unsafe {
3048        let min = _mm_min_ps(a, b).as_f32x4();
3049        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3050    }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061    unsafe {
3062        let min = _mm_min_ps(a, b).as_f32x4();
3063        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3064    }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086    unsafe {
3087        let min = _mm512_min_pd(a, b).as_f64x8();
3088        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3089    }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100    unsafe {
3101        let min = _mm512_min_pd(a, b).as_f64x8();
3102        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3103    }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114    unsafe {
3115        let min = _mm256_min_pd(a, b).as_f64x4();
3116        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3117    }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128    unsafe {
3129        let min = _mm256_min_pd(a, b).as_f64x4();
3130        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3131    }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142    unsafe {
3143        let min = _mm_min_pd(a, b).as_f64x2();
3144        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3145    }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156    unsafe {
3157        let min = _mm_min_pd(a, b).as_f64x2();
3158        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3159    }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170    unsafe {
3171        let a = a.as_u32x16();
3172        let b = b.as_u32x16();
3173        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
3174    }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185    unsafe {
3186        let min = _mm512_min_epu32(a, b).as_u32x16();
3187        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3188    }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199    unsafe {
3200        let min = _mm512_min_epu32(a, b).as_u32x16();
3201        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3202    }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213    unsafe {
3214        let min = _mm256_min_epu32(a, b).as_u32x8();
3215        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3216    }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227    unsafe {
3228        let min = _mm256_min_epu32(a, b).as_u32x8();
3229        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3230    }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241    unsafe {
3242        let min = _mm_min_epu32(a, b).as_u32x4();
3243        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3244    }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255    unsafe {
3256        let min = _mm_min_epu32(a, b).as_u32x4();
3257        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3258    }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269    unsafe {
3270        let a = a.as_u64x8();
3271        let b = b.as_u64x8();
3272        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
3273    }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284    unsafe {
3285        let min = _mm512_min_epu64(a, b).as_u64x8();
3286        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3287    }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298    unsafe {
3299        let min = _mm512_min_epu64(a, b).as_u64x8();
3300        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3301    }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312    unsafe {
3313        let a = a.as_u64x4();
3314        let b = b.as_u64x4();
3315        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
3316    }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327    unsafe {
3328        let min = _mm256_min_epu64(a, b).as_u64x4();
3329        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3330    }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341    unsafe {
3342        let min = _mm256_min_epu64(a, b).as_u64x4();
3343        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3344    }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355    unsafe {
3356        let a = a.as_u64x2();
3357        let b = b.as_u64x2();
3358        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
3359    }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370    unsafe {
3371        let min = _mm_min_epu64(a, b).as_u64x2();
3372        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3373    }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384    unsafe {
3385        let min = _mm_min_epu64(a, b).as_u64x2();
3386        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3387    }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398    unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475    unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552    unsafe { simd_fma(a, b, c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662    unsafe { simd_fma(a, b, c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772    unsafe { simd_fma(a, b, simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882    unsafe { simd_fma(a, b, simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992    unsafe {
3993        let add = simd_fma(a, b, c);
3994        let sub = simd_fma(a, b, simd_neg(c));
3995        simd_shuffle!(
3996            add,
3997            sub,
3998            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999        )
4000    }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110    unsafe {
4111        let add = simd_fma(a, b, c);
4112        let sub = simd_fma(a, b, simd_neg(c));
4113        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114    }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224    unsafe {
4225        let add = simd_fma(a, b, c);
4226        let sub = simd_fma(a, b, simd_neg(c));
4227        simd_shuffle!(
4228            add,
4229            sub,
4230            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231        )
4232    }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342    unsafe {
4343        let add = simd_fma(a, b, c);
4344        let sub = simd_fma(a, b, simd_neg(c));
4345        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346    }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456    unsafe { simd_fma(simd_neg(a), b, c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566    unsafe { simd_fma(simd_neg(a), b, c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292    unsafe {
5293        transmute(vgetexpps(
5294            a.as_f32x16(),
5295            f32x16::ZERO,
5296            0b11111111_11111111,
5297            _MM_FROUND_CUR_DIRECTION,
5298        ))
5299    }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310    unsafe {
5311        transmute(vgetexpps(
5312            a.as_f32x16(),
5313            src.as_f32x16(),
5314            k,
5315            _MM_FROUND_CUR_DIRECTION,
5316        ))
5317    }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328    unsafe {
5329        transmute(vgetexpps(
5330            a.as_f32x16(),
5331            f32x16::ZERO,
5332            k,
5333            _MM_FROUND_CUR_DIRECTION,
5334        ))
5335    }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412    unsafe {
5413        transmute(vgetexppd(
5414            a.as_f64x8(),
5415            f64x8::ZERO,
5416            0b11111111,
5417            _MM_FROUND_CUR_DIRECTION,
5418        ))
5419    }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430    unsafe {
5431        transmute(vgetexppd(
5432            a.as_f64x8(),
5433            src.as_f64x8(),
5434            k,
5435            _MM_FROUND_CUR_DIRECTION,
5436        ))
5437    }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448    unsafe {
5449        transmute(vgetexppd(
5450            a.as_f64x8(),
5451            f64x8::ZERO,
5452            k,
5453            _MM_FROUND_CUR_DIRECTION,
5454        ))
5455    }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539    unsafe {
5540        static_assert_uimm_bits!(IMM8, 8);
5541        let a = a.as_f32x16();
5542        let r = vrndscaleps(
5543            a,
5544            IMM8,
5545            f32x16::ZERO,
5546            0b11111111_11111111,
5547            _MM_FROUND_CUR_DIRECTION,
5548        );
5549        transmute(r)
5550    }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568    unsafe {
5569        static_assert_uimm_bits!(IMM8, 8);
5570        let a = a.as_f32x16();
5571        let src = src.as_f32x16();
5572        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5573        transmute(r)
5574    }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592    unsafe {
5593        static_assert_uimm_bits!(IMM8, 8);
5594        let a = a.as_f32x16();
5595        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5596        transmute(r)
5597    }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615    unsafe {
5616        static_assert_uimm_bits!(IMM8, 8);
5617        let a = a.as_f32x8();
5618        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5619        transmute(r)
5620    }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe {
5639        static_assert_uimm_bits!(IMM8, 8);
5640        let a = a.as_f32x8();
5641        let src = src.as_f32x8();
5642        let r = vrndscaleps256(a, IMM8, src, k);
5643        transmute(r)
5644    }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662    unsafe {
5663        static_assert_uimm_bits!(IMM8, 8);
5664        let a = a.as_f32x8();
5665        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5666        transmute(r)
5667    }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685    unsafe {
5686        static_assert_uimm_bits!(IMM8, 8);
5687        let a = a.as_f32x4();
5688        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5689        transmute(r)
5690    }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708    unsafe {
5709        static_assert_uimm_bits!(IMM8, 8);
5710        let a = a.as_f32x4();
5711        let src = src.as_f32x4();
5712        let r = vrndscaleps128(a, IMM8, src, k);
5713        transmute(r)
5714    }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732    unsafe {
5733        static_assert_uimm_bits!(IMM8, 8);
5734        let a = a.as_f32x4();
5735        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
5736        transmute(r)
5737    }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755    unsafe {
5756        static_assert_uimm_bits!(IMM8, 8);
5757        let a = a.as_f64x8();
5758        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5759        transmute(r)
5760    }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778    src: __m512d,
5779    k: __mmask8,
5780    a: __m512d,
5781) -> __m512d {
5782    unsafe {
5783        static_assert_uimm_bits!(IMM8, 8);
5784        let a = a.as_f64x8();
5785        let src = src.as_f64x8();
5786        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5787        transmute(r)
5788    }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806    unsafe {
5807        static_assert_uimm_bits!(IMM8, 8);
5808        let a = a.as_f64x8();
5809        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5810        transmute(r)
5811    }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829    unsafe {
5830        static_assert_uimm_bits!(IMM8, 8);
5831        let a = a.as_f64x4();
5832        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
5833        transmute(r)
5834    }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852    src: __m256d,
5853    k: __mmask8,
5854    a: __m256d,
5855) -> __m256d {
5856    unsafe {
5857        static_assert_uimm_bits!(IMM8, 8);
5858        let a = a.as_f64x4();
5859        let src = src.as_f64x4();
5860        let r = vrndscalepd256(a, IMM8, src, k);
5861        transmute(r)
5862    }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880    unsafe {
5881        static_assert_uimm_bits!(IMM8, 8);
5882        let a = a.as_f64x4();
5883        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
5884        transmute(r)
5885    }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903    unsafe {
5904        static_assert_uimm_bits!(IMM8, 8);
5905        let a = a.as_f64x2();
5906        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
5907        transmute(r)
5908    }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926    unsafe {
5927        static_assert_uimm_bits!(IMM8, 8);
5928        let a = a.as_f64x2();
5929        let src = src.as_f64x2();
5930        let r = vrndscalepd128(a, IMM8, src, k);
5931        transmute(r)
5932    }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950    unsafe {
5951        static_assert_uimm_bits!(IMM8, 8);
5952        let a = a.as_f64x2();
5953        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
5954        transmute(r)
5955    }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966    unsafe {
5967        transmute(vscalefps(
5968            a.as_f32x16(),
5969            b.as_f32x16(),
5970            f32x16::ZERO,
5971            0b11111111_11111111,
5972            _MM_FROUND_CUR_DIRECTION,
5973        ))
5974    }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985    unsafe {
5986        transmute(vscalefps(
5987            a.as_f32x16(),
5988            b.as_f32x16(),
5989            src.as_f32x16(),
5990            k,
5991            _MM_FROUND_CUR_DIRECTION,
5992        ))
5993    }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004    unsafe {
6005        transmute(vscalefps(
6006            a.as_f32x16(),
6007            b.as_f32x16(),
6008            f32x16::ZERO,
6009            k,
6010            _MM_FROUND_CUR_DIRECTION,
6011        ))
6012    }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023    unsafe {
6024        transmute(vscalefps256(
6025            a.as_f32x8(),
6026            b.as_f32x8(),
6027            f32x8::ZERO,
6028            0b11111111,
6029        ))
6030    }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063    unsafe {
6064        transmute(vscalefps128(
6065            a.as_f32x4(),
6066            b.as_f32x4(),
6067            f32x4::ZERO,
6068            0b00001111,
6069        ))
6070    }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103    unsafe {
6104        transmute(vscalefpd(
6105            a.as_f64x8(),
6106            b.as_f64x8(),
6107            f64x8::ZERO,
6108            0b11111111,
6109            _MM_FROUND_CUR_DIRECTION,
6110        ))
6111    }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122    unsafe {
6123        transmute(vscalefpd(
6124            a.as_f64x8(),
6125            b.as_f64x8(),
6126            src.as_f64x8(),
6127            k,
6128            _MM_FROUND_CUR_DIRECTION,
6129        ))
6130    }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141    unsafe {
6142        transmute(vscalefpd(
6143            a.as_f64x8(),
6144            b.as_f64x8(),
6145            f64x8::ZERO,
6146            k,
6147            _MM_FROUND_CUR_DIRECTION,
6148        ))
6149    }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160    unsafe {
6161        transmute(vscalefpd256(
6162            a.as_f64x4(),
6163            b.as_f64x4(),
6164            f64x4::ZERO,
6165            0b00001111,
6166        ))
6167    }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200    unsafe {
6201        transmute(vscalefpd128(
6202            a.as_f64x2(),
6203            b.as_f64x2(),
6204            f64x2::ZERO,
6205            0b00000011,
6206        ))
6207    }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241    unsafe {
6242        static_assert_uimm_bits!(IMM8, 8);
6243        let a = a.as_f32x16();
6244        let b = b.as_f32x16();
6245        let c = c.as_i32x16();
6246        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247        transmute(r)
6248    }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260    a: __m512,
6261    k: __mmask16,
6262    b: __m512,
6263    c: __m512i,
6264) -> __m512 {
6265    unsafe {
6266        static_assert_uimm_bits!(IMM8, 8);
6267        let a = a.as_f32x16();
6268        let b = b.as_f32x16();
6269        let c = c.as_i32x16();
6270        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6271        transmute(r)
6272    }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284    k: __mmask16,
6285    a: __m512,
6286    b: __m512,
6287    c: __m512i,
6288) -> __m512 {
6289    unsafe {
6290        static_assert_uimm_bits!(IMM8, 8);
6291        let a = a.as_f32x16();
6292        let b = b.as_f32x16();
6293        let c = c.as_i32x16();
6294        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6295        transmute(r)
6296    }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308    unsafe {
6309        static_assert_uimm_bits!(IMM8, 8);
6310        let a = a.as_f32x8();
6311        let b = b.as_f32x8();
6312        let c = c.as_i32x8();
6313        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6314        transmute(r)
6315    }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327    a: __m256,
6328    k: __mmask8,
6329    b: __m256,
6330    c: __m256i,
6331) -> __m256 {
6332    unsafe {
6333        static_assert_uimm_bits!(IMM8, 8);
6334        let a = a.as_f32x8();
6335        let b = b.as_f32x8();
6336        let c = c.as_i32x8();
6337        let r = vfixupimmps256(a, b, c, IMM8, k);
6338        transmute(r)
6339    }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351    k: __mmask8,
6352    a: __m256,
6353    b: __m256,
6354    c: __m256i,
6355) -> __m256 {
6356    unsafe {
6357        static_assert_uimm_bits!(IMM8, 8);
6358        let a = a.as_f32x8();
6359        let b = b.as_f32x8();
6360        let c = c.as_i32x8();
6361        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6362        transmute(r)
6363    }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375    unsafe {
6376        static_assert_uimm_bits!(IMM8, 8);
6377        let a = a.as_f32x4();
6378        let b = b.as_f32x4();
6379        let c = c.as_i32x4();
6380        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6381        transmute(r)
6382    }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394    a: __m128,
6395    k: __mmask8,
6396    b: __m128,
6397    c: __m128i,
6398) -> __m128 {
6399    unsafe {
6400        static_assert_uimm_bits!(IMM8, 8);
6401        let a = a.as_f32x4();
6402        let b = b.as_f32x4();
6403        let c = c.as_i32x4();
6404        let r = vfixupimmps128(a, b, c, IMM8, k);
6405        transmute(r)
6406    }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418    k: __mmask8,
6419    a: __m128,
6420    b: __m128,
6421    c: __m128i,
6422) -> __m128 {
6423    unsafe {
6424        static_assert_uimm_bits!(IMM8, 8);
6425        let a = a.as_f32x4();
6426        let b = b.as_f32x4();
6427        let c = c.as_i32x4();
6428        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6429        transmute(r)
6430    }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442    unsafe {
6443        static_assert_uimm_bits!(IMM8, 8);
6444        let a = a.as_f64x8();
6445        let b = b.as_f64x8();
6446        let c = c.as_i64x8();
6447        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6448        transmute(r)
6449    }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461    a: __m512d,
6462    k: __mmask8,
6463    b: __m512d,
6464    c: __m512i,
6465) -> __m512d {
6466    unsafe {
6467        static_assert_uimm_bits!(IMM8, 8);
6468        let a = a.as_f64x8();
6469        let b = b.as_f64x8();
6470        let c = c.as_i64x8();
6471        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6472        transmute(r)
6473    }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485    k: __mmask8,
6486    a: __m512d,
6487    b: __m512d,
6488    c: __m512i,
6489) -> __m512d {
6490    unsafe {
6491        static_assert_uimm_bits!(IMM8, 8);
6492        let a = a.as_f64x8();
6493        let b = b.as_f64x8();
6494        let c = c.as_i64x8();
6495        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6496        transmute(r)
6497    }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509    unsafe {
6510        static_assert_uimm_bits!(IMM8, 8);
6511        let a = a.as_f64x4();
6512        let b = b.as_f64x4();
6513        let c = c.as_i64x4();
6514        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6515        transmute(r)
6516    }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528    a: __m256d,
6529    k: __mmask8,
6530    b: __m256d,
6531    c: __m256i,
6532) -> __m256d {
6533    unsafe {
6534        static_assert_uimm_bits!(IMM8, 8);
6535        let a = a.as_f64x4();
6536        let b = b.as_f64x4();
6537        let c = c.as_i64x4();
6538        let r = vfixupimmpd256(a, b, c, IMM8, k);
6539        transmute(r)
6540    }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552    k: __mmask8,
6553    a: __m256d,
6554    b: __m256d,
6555    c: __m256i,
6556) -> __m256d {
6557    unsafe {
6558        static_assert_uimm_bits!(IMM8, 8);
6559        let a = a.as_f64x4();
6560        let b = b.as_f64x4();
6561        let c = c.as_i64x4();
6562        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6563        transmute(r)
6564    }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576    unsafe {
6577        static_assert_uimm_bits!(IMM8, 8);
6578        let a = a.as_f64x2();
6579        let b = b.as_f64x2();
6580        let c = c.as_i64x2();
6581        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6582        transmute(r)
6583    }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595    a: __m128d,
6596    k: __mmask8,
6597    b: __m128d,
6598    c: __m128i,
6599) -> __m128d {
6600    unsafe {
6601        static_assert_uimm_bits!(IMM8, 8);
6602        let a = a.as_f64x2();
6603        let b = b.as_f64x2();
6604        let c = c.as_i64x2();
6605        let r = vfixupimmpd128(a, b, c, IMM8, k);
6606        transmute(r)
6607    }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619    k: __mmask8,
6620    a: __m128d,
6621    b: __m128d,
6622    c: __m128i,
6623) -> __m128d {
6624    unsafe {
6625        static_assert_uimm_bits!(IMM8, 8);
6626        let a = a.as_f64x2();
6627        let b = b.as_f64x2();
6628        let c = c.as_i64x2();
6629        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6630        transmute(r)
6631    }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643    unsafe {
6644        static_assert_uimm_bits!(IMM8, 8);
6645        let a = a.as_i32x16();
6646        let b = b.as_i32x16();
6647        let c = c.as_i32x16();
6648        let r = vpternlogd(a, b, c, IMM8);
6649        transmute(r)
6650    }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662    src: __m512i,
6663    k: __mmask16,
6664    a: __m512i,
6665    b: __m512i,
6666) -> __m512i {
6667    unsafe {
6668        static_assert_uimm_bits!(IMM8, 8);
6669        let src = src.as_i32x16();
6670        let a = a.as_i32x16();
6671        let b = b.as_i32x16();
6672        let r = vpternlogd(src, a, b, IMM8);
6673        transmute(simd_select_bitmask(k, r, src))
6674    }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686    k: __mmask16,
6687    a: __m512i,
6688    b: __m512i,
6689    c: __m512i,
6690) -> __m512i {
6691    unsafe {
6692        static_assert_uimm_bits!(IMM8, 8);
6693        let a = a.as_i32x16();
6694        let b = b.as_i32x16();
6695        let c = c.as_i32x16();
6696        let r = vpternlogd(a, b, c, IMM8);
6697        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6698    }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710    unsafe {
6711        static_assert_uimm_bits!(IMM8, 8);
6712        let a = a.as_i32x8();
6713        let b = b.as_i32x8();
6714        let c = c.as_i32x8();
6715        let r = vpternlogd256(a, b, c, IMM8);
6716        transmute(r)
6717    }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729    src: __m256i,
6730    k: __mmask8,
6731    a: __m256i,
6732    b: __m256i,
6733) -> __m256i {
6734    unsafe {
6735        static_assert_uimm_bits!(IMM8, 8);
6736        let src = src.as_i32x8();
6737        let a = a.as_i32x8();
6738        let b = b.as_i32x8();
6739        let r = vpternlogd256(src, a, b, IMM8);
6740        transmute(simd_select_bitmask(k, r, src))
6741    }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753    k: __mmask8,
6754    a: __m256i,
6755    b: __m256i,
6756    c: __m256i,
6757) -> __m256i {
6758    unsafe {
6759        static_assert_uimm_bits!(IMM8, 8);
6760        let a = a.as_i32x8();
6761        let b = b.as_i32x8();
6762        let c = c.as_i32x8();
6763        let r = vpternlogd256(a, b, c, IMM8);
6764        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
6765    }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777    unsafe {
6778        static_assert_uimm_bits!(IMM8, 8);
6779        let a = a.as_i32x4();
6780        let b = b.as_i32x4();
6781        let c = c.as_i32x4();
6782        let r = vpternlogd128(a, b, c, IMM8);
6783        transmute(r)
6784    }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796    src: __m128i,
6797    k: __mmask8,
6798    a: __m128i,
6799    b: __m128i,
6800) -> __m128i {
6801    unsafe {
6802        static_assert_uimm_bits!(IMM8, 8);
6803        let src = src.as_i32x4();
6804        let a = a.as_i32x4();
6805        let b = b.as_i32x4();
6806        let r = vpternlogd128(src, a, b, IMM8);
6807        transmute(simd_select_bitmask(k, r, src))
6808    }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820    k: __mmask8,
6821    a: __m128i,
6822    b: __m128i,
6823    c: __m128i,
6824) -> __m128i {
6825    unsafe {
6826        static_assert_uimm_bits!(IMM8, 8);
6827        let a = a.as_i32x4();
6828        let b = b.as_i32x4();
6829        let c = c.as_i32x4();
6830        let r = vpternlogd128(a, b, c, IMM8);
6831        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
6832    }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844    unsafe {
6845        static_assert_uimm_bits!(IMM8, 8);
6846        let a = a.as_i64x8();
6847        let b = b.as_i64x8();
6848        let c = c.as_i64x8();
6849        let r = vpternlogq(a, b, c, IMM8);
6850        transmute(r)
6851    }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863    src: __m512i,
6864    k: __mmask8,
6865    a: __m512i,
6866    b: __m512i,
6867) -> __m512i {
6868    unsafe {
6869        static_assert_uimm_bits!(IMM8, 8);
6870        let src = src.as_i64x8();
6871        let a = a.as_i64x8();
6872        let b = b.as_i64x8();
6873        let r = vpternlogq(src, a, b, IMM8);
6874        transmute(simd_select_bitmask(k, r, src))
6875    }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887    k: __mmask8,
6888    a: __m512i,
6889    b: __m512i,
6890    c: __m512i,
6891) -> __m512i {
6892    unsafe {
6893        static_assert_uimm_bits!(IMM8, 8);
6894        let a = a.as_i64x8();
6895        let b = b.as_i64x8();
6896        let c = c.as_i64x8();
6897        let r = vpternlogq(a, b, c, IMM8);
6898        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
6899    }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911    unsafe {
6912        static_assert_uimm_bits!(IMM8, 8);
6913        let a = a.as_i64x4();
6914        let b = b.as_i64x4();
6915        let c = c.as_i64x4();
6916        let r = vpternlogq256(a, b, c, IMM8);
6917        transmute(r)
6918    }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930    src: __m256i,
6931    k: __mmask8,
6932    a: __m256i,
6933    b: __m256i,
6934) -> __m256i {
6935    unsafe {
6936        static_assert_uimm_bits!(IMM8, 8);
6937        let src = src.as_i64x4();
6938        let a = a.as_i64x4();
6939        let b = b.as_i64x4();
6940        let r = vpternlogq256(src, a, b, IMM8);
6941        transmute(simd_select_bitmask(k, r, src))
6942    }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954    k: __mmask8,
6955    a: __m256i,
6956    b: __m256i,
6957    c: __m256i,
6958) -> __m256i {
6959    unsafe {
6960        static_assert_uimm_bits!(IMM8, 8);
6961        let a = a.as_i64x4();
6962        let b = b.as_i64x4();
6963        let c = c.as_i64x4();
6964        let r = vpternlogq256(a, b, c, IMM8);
6965        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
6966    }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978    unsafe {
6979        static_assert_uimm_bits!(IMM8, 8);
6980        let a = a.as_i64x2();
6981        let b = b.as_i64x2();
6982        let c = c.as_i64x2();
6983        let r = vpternlogq128(a, b, c, IMM8);
6984        transmute(r)
6985    }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997    src: __m128i,
6998    k: __mmask8,
6999    a: __m128i,
7000    b: __m128i,
7001) -> __m128i {
7002    unsafe {
7003        static_assert_uimm_bits!(IMM8, 8);
7004        let src = src.as_i64x2();
7005        let a = a.as_i64x2();
7006        let b = b.as_i64x2();
7007        let r = vpternlogq128(src, a, b, IMM8);
7008        transmute(simd_select_bitmask(k, r, src))
7009    }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021    k: __mmask8,
7022    a: __m128i,
7023    b: __m128i,
7024    c: __m128i,
7025) -> __m128i {
7026    unsafe {
7027        static_assert_uimm_bits!(IMM8, 8);
7028        let a = a.as_i64x2();
7029        let b = b.as_i64x2();
7030        let c = c.as_i64x2();
7031        let r = vpternlogq128(a, b, c, IMM8);
7032        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7033    }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038///    _MM_MANT_NORM_1_2     // interval [1, 2)
7039///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7040///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7041///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043///    _MM_MANT_SIGN_src     // sign = sign(src)
7044///    _MM_MANT_SIGN_zero    // sign = 0
7045///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054    a: __m512,
7055) -> __m512 {
7056    unsafe {
7057        static_assert_uimm_bits!(NORM, 4);
7058        static_assert_uimm_bits!(SIGN, 2);
7059        let a = a.as_f32x16();
7060        let zero = f32x16::ZERO;
7061        let r = vgetmantps(
7062            a,
7063            SIGN << 2 | NORM,
7064            zero,
7065            0b11111111_11111111,
7066            _MM_FROUND_CUR_DIRECTION,
7067        );
7068        transmute(r)
7069    }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7075///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7076///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7077///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079///    _MM_MANT_SIGN_src     // sign = sign(src)\
7080///    _MM_MANT_SIGN_zero    // sign = 0\
7081///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090    const NORM: _MM_MANTISSA_NORM_ENUM,
7091    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093    src: __m512,
7094    k: __mmask16,
7095    a: __m512,
7096) -> __m512 {
7097    unsafe {
7098        static_assert_uimm_bits!(NORM, 4);
7099        static_assert_uimm_bits!(SIGN, 2);
7100        let a = a.as_f32x16();
7101        let src = src.as_f32x16();
7102        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7103        transmute(r)
7104    }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7110///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7111///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7112///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114///    _MM_MANT_SIGN_src     // sign = sign(src)\
7115///    _MM_MANT_SIGN_zero    // sign = 0\
7116///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125    const NORM: _MM_MANTISSA_NORM_ENUM,
7126    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128    k: __mmask16,
7129    a: __m512,
7130) -> __m512 {
7131    unsafe {
7132        static_assert_uimm_bits!(NORM, 4);
7133        static_assert_uimm_bits!(SIGN, 2);
7134        let a = a.as_f32x16();
7135        let r = vgetmantps(
7136            a,
7137            SIGN << 2 | NORM,
7138            f32x16::ZERO,
7139            k,
7140            _MM_FROUND_CUR_DIRECTION,
7141        );
7142        transmute(r)
7143    }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148///    _MM_MANT_NORM_1_2     // interval [1, 2)
7149///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7150///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7151///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153///    _MM_MANT_SIGN_src     // sign = sign(src)
7154///    _MM_MANT_SIGN_zero    // sign = 0
7155///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164    a: __m256,
7165) -> __m256 {
7166    unsafe {
7167        static_assert_uimm_bits!(NORM, 4);
7168        static_assert_uimm_bits!(SIGN, 2);
7169        let a = a.as_f32x8();
7170        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7171        transmute(r)
7172    }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182///    _MM_MANT_SIGN_src     // sign = sign(src)\
7183///    _MM_MANT_SIGN_zero    // sign = 0\
7184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193    const NORM: _MM_MANTISSA_NORM_ENUM,
7194    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196    src: __m256,
7197    k: __mmask8,
7198    a: __m256,
7199) -> __m256 {
7200    unsafe {
7201        static_assert_uimm_bits!(NORM, 4);
7202        static_assert_uimm_bits!(SIGN, 2);
7203        let a = a.as_f32x8();
7204        let src = src.as_f32x8();
7205        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7206        transmute(r)
7207    }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7213///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7214///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7215///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217///    _MM_MANT_SIGN_src     // sign = sign(src)\
7218///    _MM_MANT_SIGN_zero    // sign = 0\
7219///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228    const NORM: _MM_MANTISSA_NORM_ENUM,
7229    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231    k: __mmask8,
7232    a: __m256,
7233) -> __m256 {
7234    unsafe {
7235        static_assert_uimm_bits!(NORM, 4);
7236        static_assert_uimm_bits!(SIGN, 2);
7237        let a = a.as_f32x8();
7238        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7239        transmute(r)
7240    }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245///    _MM_MANT_NORM_1_2     // interval [1, 2)
7246///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7247///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7248///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250///    _MM_MANT_SIGN_src     // sign = sign(src)
7251///    _MM_MANT_SIGN_zero    // sign = 0
7252///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261    a: __m128,
7262) -> __m128 {
7263    unsafe {
7264        static_assert_uimm_bits!(NORM, 4);
7265        static_assert_uimm_bits!(SIGN, 2);
7266        let a = a.as_f32x4();
7267        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7268        transmute(r)
7269    }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7275///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7276///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7277///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279///    _MM_MANT_SIGN_src     // sign = sign(src)\
7280///    _MM_MANT_SIGN_zero    // sign = 0\
7281///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290    const NORM: _MM_MANTISSA_NORM_ENUM,
7291    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293    src: __m128,
7294    k: __mmask8,
7295    a: __m128,
7296) -> __m128 {
7297    unsafe {
7298        static_assert_uimm_bits!(NORM, 4);
7299        static_assert_uimm_bits!(SIGN, 2);
7300        let a = a.as_f32x4();
7301        let src = src.as_f32x4();
7302        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7303        transmute(r)
7304    }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7310///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7311///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7312///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314///    _MM_MANT_SIGN_src     // sign = sign(src)\
7315///    _MM_MANT_SIGN_zero    // sign = 0\
7316///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325    const NORM: _MM_MANTISSA_NORM_ENUM,
7326    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328    k: __mmask8,
7329    a: __m128,
7330) -> __m128 {
7331    unsafe {
7332        static_assert_uimm_bits!(NORM, 4);
7333        static_assert_uimm_bits!(SIGN, 2);
7334        let a = a.as_f32x4();
7335        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7336        transmute(r)
7337    }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7343///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7344///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7345///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347///    _MM_MANT_SIGN_src     // sign = sign(src)\
7348///    _MM_MANT_SIGN_zero    // sign = 0\
7349///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358    a: __m512d,
7359) -> __m512d {
7360    unsafe {
7361        static_assert_uimm_bits!(NORM, 4);
7362        static_assert_uimm_bits!(SIGN, 2);
7363        let a = a.as_f64x8();
7364        let zero = f64x8::ZERO;
7365        let r = vgetmantpd(
7366            a,
7367            SIGN << 2 | NORM,
7368            zero,
7369            0b11111111,
7370            _MM_FROUND_CUR_DIRECTION,
7371        );
7372        transmute(r)
7373    }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7379///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7380///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7381///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383///    _MM_MANT_SIGN_src     // sign = sign(src)\
7384///    _MM_MANT_SIGN_zero    // sign = 0\
7385///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394    const NORM: _MM_MANTISSA_NORM_ENUM,
7395    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397    src: __m512d,
7398    k: __mmask8,
7399    a: __m512d,
7400) -> __m512d {
7401    unsafe {
7402        static_assert_uimm_bits!(NORM, 4);
7403        static_assert_uimm_bits!(SIGN, 2);
7404        let a = a.as_f64x8();
7405        let src = src.as_f64x8();
7406        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7407        transmute(r)
7408    }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7414///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7415///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7416///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418///    _MM_MANT_SIGN_src     // sign = sign(src)\
7419///    _MM_MANT_SIGN_zero    // sign = 0\
7420///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429    const NORM: _MM_MANTISSA_NORM_ENUM,
7430    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432    k: __mmask8,
7433    a: __m512d,
7434) -> __m512d {
7435    unsafe {
7436        static_assert_uimm_bits!(NORM, 4);
7437        static_assert_uimm_bits!(SIGN, 2);
7438        let a = a.as_f64x8();
7439        let r = vgetmantpd(
7440            a,
7441            SIGN << 2 | NORM,
7442            f64x8::ZERO,
7443            k,
7444            _MM_FROUND_CUR_DIRECTION,
7445        );
7446        transmute(r)
7447    }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7453///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7454///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7455///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457///    _MM_MANT_SIGN_src     // sign = sign(src)\
7458///    _MM_MANT_SIGN_zero    // sign = 0\
7459///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468    a: __m256d,
7469) -> __m256d {
7470    unsafe {
7471        static_assert_uimm_bits!(NORM, 4);
7472        static_assert_uimm_bits!(SIGN, 2);
7473        let a = a.as_f64x4();
7474        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7475        transmute(r)
7476    }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7482///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7483///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7484///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486///    _MM_MANT_SIGN_src     // sign = sign(src)\
7487///    _MM_MANT_SIGN_zero    // sign = 0\
7488///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497    const NORM: _MM_MANTISSA_NORM_ENUM,
7498    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500    src: __m256d,
7501    k: __mmask8,
7502    a: __m256d,
7503) -> __m256d {
7504    unsafe {
7505        static_assert_uimm_bits!(NORM, 4);
7506        static_assert_uimm_bits!(SIGN, 2);
7507        let a = a.as_f64x4();
7508        let src = src.as_f64x4();
7509        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7510        transmute(r)
7511    }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7517///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7518///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7519///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521///    _MM_MANT_SIGN_src     // sign = sign(src)\
7522///    _MM_MANT_SIGN_zero    // sign = 0\
7523///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532    const NORM: _MM_MANTISSA_NORM_ENUM,
7533    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535    k: __mmask8,
7536    a: __m256d,
7537) -> __m256d {
7538    unsafe {
7539        static_assert_uimm_bits!(NORM, 4);
7540        static_assert_uimm_bits!(SIGN, 2);
7541        let a = a.as_f64x4();
7542        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7543        transmute(r)
7544    }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7550///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7551///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7552///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554///    _MM_MANT_SIGN_src     // sign = sign(src)\
7555///    _MM_MANT_SIGN_zero    // sign = 0\
7556///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565    a: __m128d,
7566) -> __m128d {
7567    unsafe {
7568        static_assert_uimm_bits!(NORM, 4);
7569        static_assert_uimm_bits!(SIGN, 2);
7570        let a = a.as_f64x2();
7571        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7572        transmute(r)
7573    }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7579///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7580///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7581///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583///    _MM_MANT_SIGN_src     // sign = sign(src)\
7584///    _MM_MANT_SIGN_zero    // sign = 0\
7585///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594    const NORM: _MM_MANTISSA_NORM_ENUM,
7595    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597    src: __m128d,
7598    k: __mmask8,
7599    a: __m128d,
7600) -> __m128d {
7601    unsafe {
7602        static_assert_uimm_bits!(NORM, 4);
7603        static_assert_uimm_bits!(SIGN, 2);
7604        let a = a.as_f64x2();
7605        let src = src.as_f64x2();
7606        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7607        transmute(r)
7608    }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7614///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7615///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7616///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618///    _MM_MANT_SIGN_src     // sign = sign(src)\
7619///    _MM_MANT_SIGN_zero    // sign = 0\
7620///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629    const NORM: _MM_MANTISSA_NORM_ENUM,
7630    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632    k: __mmask8,
7633    a: __m128d,
7634) -> __m128d {
7635    unsafe {
7636        static_assert_uimm_bits!(NORM, 4);
7637        static_assert_uimm_bits!(SIGN, 2);
7638        let a = a.as_f64x2();
7639        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7640        transmute(r)
7641    }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660    unsafe {
7661        static_assert_rounding!(ROUNDING);
7662        let a = a.as_f32x16();
7663        let b = b.as_f32x16();
7664        let r = vaddps(a, b, ROUNDING);
7665        transmute(r)
7666    }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685    src: __m512,
7686    k: __mmask16,
7687    a: __m512,
7688    b: __m512,
7689) -> __m512 {
7690    unsafe {
7691        static_assert_rounding!(ROUNDING);
7692        let a = a.as_f32x16();
7693        let b = b.as_f32x16();
7694        let r = vaddps(a, b, ROUNDING);
7695        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7696    }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715    k: __mmask16,
7716    a: __m512,
7717    b: __m512,
7718) -> __m512 {
7719    unsafe {
7720        static_assert_rounding!(ROUNDING);
7721        let a = a.as_f32x16();
7722        let b = b.as_f32x16();
7723        let r = vaddps(a, b, ROUNDING);
7724        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7725    }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744    unsafe {
7745        static_assert_rounding!(ROUNDING);
7746        let a = a.as_f64x8();
7747        let b = b.as_f64x8();
7748        let r = vaddpd(a, b, ROUNDING);
7749        transmute(r)
7750    }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769    src: __m512d,
7770    k: __mmask8,
7771    a: __m512d,
7772    b: __m512d,
7773) -> __m512d {
7774    unsafe {
7775        static_assert_rounding!(ROUNDING);
7776        let a = a.as_f64x8();
7777        let b = b.as_f64x8();
7778        let r = vaddpd(a, b, ROUNDING);
7779        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7780    }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799    k: __mmask8,
7800    a: __m512d,
7801    b: __m512d,
7802) -> __m512d {
7803    unsafe {
7804        static_assert_rounding!(ROUNDING);
7805        let a = a.as_f64x8();
7806        let b = b.as_f64x8();
7807        let r = vaddpd(a, b, ROUNDING);
7808        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7809    }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828    unsafe {
7829        static_assert_rounding!(ROUNDING);
7830        let a = a.as_f32x16();
7831        let b = b.as_f32x16();
7832        let r = vsubps(a, b, ROUNDING);
7833        transmute(r)
7834    }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853    src: __m512,
7854    k: __mmask16,
7855    a: __m512,
7856    b: __m512,
7857) -> __m512 {
7858    unsafe {
7859        static_assert_rounding!(ROUNDING);
7860        let a = a.as_f32x16();
7861        let b = b.as_f32x16();
7862        let r = vsubps(a, b, ROUNDING);
7863        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7864    }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883    k: __mmask16,
7884    a: __m512,
7885    b: __m512,
7886) -> __m512 {
7887    unsafe {
7888        static_assert_rounding!(ROUNDING);
7889        let a = a.as_f32x16();
7890        let b = b.as_f32x16();
7891        let r = vsubps(a, b, ROUNDING);
7892        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7893    }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912    unsafe {
7913        static_assert_rounding!(ROUNDING);
7914        let a = a.as_f64x8();
7915        let b = b.as_f64x8();
7916        let r = vsubpd(a, b, ROUNDING);
7917        transmute(r)
7918    }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937    src: __m512d,
7938    k: __mmask8,
7939    a: __m512d,
7940    b: __m512d,
7941) -> __m512d {
7942    unsafe {
7943        static_assert_rounding!(ROUNDING);
7944        let a = a.as_f64x8();
7945        let b = b.as_f64x8();
7946        let r = vsubpd(a, b, ROUNDING);
7947        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7948    }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967    k: __mmask8,
7968    a: __m512d,
7969    b: __m512d,
7970) -> __m512d {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f64x8();
7974        let b = b.as_f64x8();
7975        let r = vsubpd(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7977    }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996    unsafe {
7997        static_assert_rounding!(ROUNDING);
7998        let a = a.as_f32x16();
7999        let b = b.as_f32x16();
8000        let r = vmulps(a, b, ROUNDING);
8001        transmute(r)
8002    }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021    src: __m512,
8022    k: __mmask16,
8023    a: __m512,
8024    b: __m512,
8025) -> __m512 {
8026    unsafe {
8027        static_assert_rounding!(ROUNDING);
8028        let a = a.as_f32x16();
8029        let b = b.as_f32x16();
8030        let r = vmulps(a, b, ROUNDING);
8031        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8032    }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051    k: __mmask16,
8052    a: __m512,
8053    b: __m512,
8054) -> __m512 {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f32x16();
8058        let b = b.as_f32x16();
8059        let r = vmulps(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8061    }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080    unsafe {
8081        static_assert_rounding!(ROUNDING);
8082        let a = a.as_f64x8();
8083        let b = b.as_f64x8();
8084        let r = vmulpd(a, b, ROUNDING);
8085        transmute(r)
8086    }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105    src: __m512d,
8106    k: __mmask8,
8107    a: __m512d,
8108    b: __m512d,
8109) -> __m512d {
8110    unsafe {
8111        static_assert_rounding!(ROUNDING);
8112        let a = a.as_f64x8();
8113        let b = b.as_f64x8();
8114        let r = vmulpd(a, b, ROUNDING);
8115        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8116    }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135    k: __mmask8,
8136    a: __m512d,
8137    b: __m512d,
8138) -> __m512d {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f64x8();
8142        let b = b.as_f64x8();
8143        let r = vmulpd(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8145    }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164    unsafe {
8165        static_assert_rounding!(ROUNDING);
8166        let a = a.as_f32x16();
8167        let b = b.as_f32x16();
8168        let r = vdivps(a, b, ROUNDING);
8169        transmute(r)
8170    }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189    src: __m512,
8190    k: __mmask16,
8191    a: __m512,
8192    b: __m512,
8193) -> __m512 {
8194    unsafe {
8195        static_assert_rounding!(ROUNDING);
8196        let a = a.as_f32x16();
8197        let b = b.as_f32x16();
8198        let r = vdivps(a, b, ROUNDING);
8199        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8200    }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219    k: __mmask16,
8220    a: __m512,
8221    b: __m512,
8222) -> __m512 {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f32x16();
8226        let b = b.as_f32x16();
8227        let r = vdivps(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8229    }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248    unsafe {
8249        static_assert_rounding!(ROUNDING);
8250        let a = a.as_f64x8();
8251        let b = b.as_f64x8();
8252        let r = vdivpd(a, b, ROUNDING);
8253        transmute(r)
8254    }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273    src: __m512d,
8274    k: __mmask8,
8275    a: __m512d,
8276    b: __m512d,
8277) -> __m512d {
8278    unsafe {
8279        static_assert_rounding!(ROUNDING);
8280        let a = a.as_f64x8();
8281        let b = b.as_f64x8();
8282        let r = vdivpd(a, b, ROUNDING);
8283        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8284    }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303    k: __mmask8,
8304    a: __m512d,
8305    b: __m512d,
8306) -> __m512d {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f64x8();
8310        let b = b.as_f64x8();
8311        let r = vdivpd(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8313    }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332    unsafe {
8333        static_assert_rounding!(ROUNDING);
8334        let a = a.as_f32x16();
8335        let r = vsqrtps(a, ROUNDING);
8336        transmute(r)
8337    }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356    src: __m512,
8357    k: __mmask16,
8358    a: __m512,
8359) -> __m512 {
8360    unsafe {
8361        static_assert_rounding!(ROUNDING);
8362        let a = a.as_f32x16();
8363        let r = vsqrtps(a, ROUNDING);
8364        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8365    }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384    unsafe {
8385        static_assert_rounding!(ROUNDING);
8386        let a = a.as_f32x16();
8387        let r = vsqrtps(a, ROUNDING);
8388        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8389    }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408    unsafe {
8409        static_assert_rounding!(ROUNDING);
8410        let a = a.as_f64x8();
8411        let r = vsqrtpd(a, ROUNDING);
8412        transmute(r)
8413    }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432    src: __m512d,
8433    k: __mmask8,
8434    a: __m512d,
8435) -> __m512d {
8436    unsafe {
8437        static_assert_rounding!(ROUNDING);
8438        let a = a.as_f64x8();
8439        let r = vsqrtpd(a, ROUNDING);
8440        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8441    }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460    unsafe {
8461        static_assert_rounding!(ROUNDING);
8462        let a = a.as_f64x8();
8463        let r = vsqrtpd(a, ROUNDING);
8464        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8465    }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484    unsafe {
8485        static_assert_rounding!(ROUNDING);
8486        vfmadd132psround(a, b, c, ROUNDING)
8487    }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506    a: __m512,
8507    k: __mmask16,
8508    b: __m512,
8509    c: __m512,
8510) -> __m512 {
8511    unsafe {
8512        static_assert_rounding!(ROUNDING);
8513        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8514    }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533    k: __mmask16,
8534    a: __m512,
8535    b: __m512,
8536    c: __m512,
8537) -> __m512 {
8538    unsafe {
8539        static_assert_rounding!(ROUNDING);
8540        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8541    }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560    a: __m512,
8561    b: __m512,
8562    c: __m512,
8563    k: __mmask16,
8564) -> __m512 {
8565    unsafe {
8566        static_assert_rounding!(ROUNDING);
8567        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8568    }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587    unsafe {
8588        static_assert_rounding!(ROUNDING);
8589        vfmadd132pdround(a, b, c, ROUNDING)
8590    }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609    a: __m512d,
8610    k: __mmask8,
8611    b: __m512d,
8612    c: __m512d,
8613) -> __m512d {
8614    unsafe {
8615        static_assert_rounding!(ROUNDING);
8616        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8617    }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636    k: __mmask8,
8637    a: __m512d,
8638    b: __m512d,
8639    c: __m512d,
8640) -> __m512d {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8644    }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663    a: __m512d,
8664    b: __m512d,
8665    c: __m512d,
8666    k: __mmask8,
8667) -> __m512d {
8668    unsafe {
8669        static_assert_rounding!(ROUNDING);
8670        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8671    }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690    unsafe {
8691        static_assert_rounding!(ROUNDING);
8692        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8693    }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712    a: __m512,
8713    k: __mmask16,
8714    b: __m512,
8715    c: __m512,
8716) -> __m512 {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8720        simd_select_bitmask(k, r, a)
8721    }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740    k: __mmask16,
8741    a: __m512,
8742    b: __m512,
8743    c: __m512,
8744) -> __m512 {
8745    unsafe {
8746        static_assert_rounding!(ROUNDING);
8747        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8748        simd_select_bitmask(k, r, _mm512_setzero_ps())
8749    }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768    a: __m512,
8769    b: __m512,
8770    c: __m512,
8771    k: __mmask16,
8772) -> __m512 {
8773    unsafe {
8774        static_assert_rounding!(ROUNDING);
8775        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8776        simd_select_bitmask(k, r, c)
8777    }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796    unsafe {
8797        static_assert_rounding!(ROUNDING);
8798        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
8799    }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818    a: __m512d,
8819    k: __mmask8,
8820    b: __m512d,
8821    c: __m512d,
8822) -> __m512d {
8823    unsafe {
8824        static_assert_rounding!(ROUNDING);
8825        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8826        simd_select_bitmask(k, r, a)
8827    }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846    k: __mmask8,
8847    a: __m512d,
8848    b: __m512d,
8849    c: __m512d,
8850) -> __m512d {
8851    unsafe {
8852        static_assert_rounding!(ROUNDING);
8853        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8854        simd_select_bitmask(k, r, _mm512_setzero_pd())
8855    }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874    a: __m512d,
8875    b: __m512d,
8876    c: __m512d,
8877    k: __mmask8,
8878) -> __m512d {
8879    unsafe {
8880        static_assert_rounding!(ROUNDING);
8881        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8882        simd_select_bitmask(k, r, c)
8883    }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902    unsafe {
8903        static_assert_rounding!(ROUNDING);
8904        vfmaddsubpsround(a, b, c, ROUNDING)
8905    }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924    a: __m512,
8925    k: __mmask16,
8926    b: __m512,
8927    c: __m512,
8928) -> __m512 {
8929    unsafe {
8930        static_assert_rounding!(ROUNDING);
8931        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
8932    }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951    k: __mmask16,
8952    a: __m512,
8953    b: __m512,
8954    c: __m512,
8955) -> __m512 {
8956    unsafe {
8957        static_assert_rounding!(ROUNDING);
8958        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
8959    }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978    a: __m512,
8979    b: __m512,
8980    c: __m512,
8981    k: __mmask16,
8982) -> __m512 {
8983    unsafe {
8984        static_assert_rounding!(ROUNDING);
8985        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
8986    }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005    a: __m512d,
9006    b: __m512d,
9007    c: __m512d,
9008) -> __m512d {
9009    unsafe {
9010        static_assert_rounding!(ROUNDING);
9011        vfmaddsubpdround(a, b, c, ROUNDING)
9012    }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031    a: __m512d,
9032    k: __mmask8,
9033    b: __m512d,
9034    c: __m512d,
9035) -> __m512d {
9036    unsafe {
9037        static_assert_rounding!(ROUNDING);
9038        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9039    }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058    k: __mmask8,
9059    a: __m512d,
9060    b: __m512d,
9061    c: __m512d,
9062) -> __m512d {
9063    unsafe {
9064        static_assert_rounding!(ROUNDING);
9065        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9066    }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085    a: __m512d,
9086    b: __m512d,
9087    c: __m512d,
9088    k: __mmask8,
9089) -> __m512d {
9090    unsafe {
9091        static_assert_rounding!(ROUNDING);
9092        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9093    }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112    unsafe {
9113        static_assert_rounding!(ROUNDING);
9114        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9115    }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134    a: __m512,
9135    k: __mmask16,
9136    b: __m512,
9137    c: __m512,
9138) -> __m512 {
9139    unsafe {
9140        static_assert_rounding!(ROUNDING);
9141        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9142        simd_select_bitmask(k, r, a)
9143    }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162    k: __mmask16,
9163    a: __m512,
9164    b: __m512,
9165    c: __m512,
9166) -> __m512 {
9167    unsafe {
9168        static_assert_rounding!(ROUNDING);
9169        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9170        simd_select_bitmask(k, r, _mm512_setzero_ps())
9171    }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190    a: __m512,
9191    b: __m512,
9192    c: __m512,
9193    k: __mmask16,
9194) -> __m512 {
9195    unsafe {
9196        static_assert_rounding!(ROUNDING);
9197        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9198        simd_select_bitmask(k, r, c)
9199    }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218    a: __m512d,
9219    b: __m512d,
9220    c: __m512d,
9221) -> __m512d {
9222    unsafe {
9223        static_assert_rounding!(ROUNDING);
9224        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9225    }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244    a: __m512d,
9245    k: __mmask8,
9246    b: __m512d,
9247    c: __m512d,
9248) -> __m512d {
9249    unsafe {
9250        static_assert_rounding!(ROUNDING);
9251        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9252        simd_select_bitmask(k, r, a)
9253    }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272    k: __mmask8,
9273    a: __m512d,
9274    b: __m512d,
9275    c: __m512d,
9276) -> __m512d {
9277    unsafe {
9278        static_assert_rounding!(ROUNDING);
9279        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9280        simd_select_bitmask(k, r, _mm512_setzero_pd())
9281    }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300    a: __m512d,
9301    b: __m512d,
9302    c: __m512d,
9303    k: __mmask8,
9304) -> __m512d {
9305    unsafe {
9306        static_assert_rounding!(ROUNDING);
9307        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9308        simd_select_bitmask(k, r, c)
9309    }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328    unsafe {
9329        static_assert_rounding!(ROUNDING);
9330        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9331    }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350    a: __m512,
9351    k: __mmask16,
9352    b: __m512,
9353    c: __m512,
9354) -> __m512 {
9355    unsafe {
9356        static_assert_rounding!(ROUNDING);
9357        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9358        simd_select_bitmask(k, r, a)
9359    }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378    k: __mmask16,
9379    a: __m512,
9380    b: __m512,
9381    c: __m512,
9382) -> __m512 {
9383    unsafe {
9384        static_assert_rounding!(ROUNDING);
9385        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9386        simd_select_bitmask(k, r, _mm512_setzero_ps())
9387    }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406    a: __m512,
9407    b: __m512,
9408    c: __m512,
9409    k: __mmask16,
9410) -> __m512 {
9411    unsafe {
9412        static_assert_rounding!(ROUNDING);
9413        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9414        simd_select_bitmask(k, r, c)
9415    }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434    unsafe {
9435        static_assert_rounding!(ROUNDING);
9436        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9437    }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456    a: __m512d,
9457    k: __mmask8,
9458    b: __m512d,
9459    c: __m512d,
9460) -> __m512d {
9461    unsafe {
9462        static_assert_rounding!(ROUNDING);
9463        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9464        simd_select_bitmask(k, r, a)
9465    }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484    k: __mmask8,
9485    a: __m512d,
9486    b: __m512d,
9487    c: __m512d,
9488) -> __m512d {
9489    unsafe {
9490        static_assert_rounding!(ROUNDING);
9491        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9492        simd_select_bitmask(k, r, _mm512_setzero_pd())
9493    }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512    a: __m512d,
9513    b: __m512d,
9514    c: __m512d,
9515    k: __mmask8,
9516) -> __m512d {
9517    unsafe {
9518        static_assert_rounding!(ROUNDING);
9519        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9520        simd_select_bitmask(k, r, c)
9521    }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540    unsafe {
9541        static_assert_rounding!(ROUNDING);
9542        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9543    }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562    a: __m512,
9563    k: __mmask16,
9564    b: __m512,
9565    c: __m512,
9566) -> __m512 {
9567    unsafe {
9568        static_assert_rounding!(ROUNDING);
9569        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9570        simd_select_bitmask(k, r, a)
9571    }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590    k: __mmask16,
9591    a: __m512,
9592    b: __m512,
9593    c: __m512,
9594) -> __m512 {
9595    unsafe {
9596        static_assert_rounding!(ROUNDING);
9597        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9598        simd_select_bitmask(k, r, _mm512_setzero_ps())
9599    }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618    a: __m512,
9619    b: __m512,
9620    c: __m512,
9621    k: __mmask16,
9622) -> __m512 {
9623    unsafe {
9624        static_assert_rounding!(ROUNDING);
9625        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9626        simd_select_bitmask(k, r, c)
9627    }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646    unsafe {
9647        static_assert_rounding!(ROUNDING);
9648        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9649    }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668    a: __m512d,
9669    k: __mmask8,
9670    b: __m512d,
9671    c: __m512d,
9672) -> __m512d {
9673    unsafe {
9674        static_assert_rounding!(ROUNDING);
9675        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9676        simd_select_bitmask(k, r, a)
9677    }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696    k: __mmask8,
9697    a: __m512d,
9698    b: __m512d,
9699    c: __m512d,
9700) -> __m512d {
9701    unsafe {
9702        static_assert_rounding!(ROUNDING);
9703        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9704        simd_select_bitmask(k, r, _mm512_setzero_pd())
9705    }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724    a: __m512d,
9725    b: __m512d,
9726    c: __m512d,
9727    k: __mmask8,
9728) -> __m512d {
9729    unsafe {
9730        static_assert_rounding!(ROUNDING);
9731        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9732        simd_select_bitmask(k, r, c)
9733    }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746    unsafe {
9747        static_assert_sae!(SAE);
9748        let a = a.as_f32x16();
9749        let b = b.as_f32x16();
9750        let r = vmaxps(a, b, SAE);
9751        transmute(r)
9752    }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765    src: __m512,
9766    k: __mmask16,
9767    a: __m512,
9768    b: __m512,
9769) -> __m512 {
9770    unsafe {
9771        static_assert_sae!(SAE);
9772        let a = a.as_f32x16();
9773        let b = b.as_f32x16();
9774        let r = vmaxps(a, b, SAE);
9775        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9776    }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789    unsafe {
9790        static_assert_sae!(SAE);
9791        let a = a.as_f32x16();
9792        let b = b.as_f32x16();
9793        let r = vmaxps(a, b, SAE);
9794        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9795    }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808    unsafe {
9809        static_assert_sae!(SAE);
9810        let a = a.as_f64x8();
9811        let b = b.as_f64x8();
9812        let r = vmaxpd(a, b, SAE);
9813        transmute(r)
9814    }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827    src: __m512d,
9828    k: __mmask8,
9829    a: __m512d,
9830    b: __m512d,
9831) -> __m512d {
9832    unsafe {
9833        static_assert_sae!(SAE);
9834        let a = a.as_f64x8();
9835        let b = b.as_f64x8();
9836        let r = vmaxpd(a, b, SAE);
9837        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9838    }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851    unsafe {
9852        static_assert_sae!(SAE);
9853        let a = a.as_f64x8();
9854        let b = b.as_f64x8();
9855        let r = vmaxpd(a, b, SAE);
9856        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9857    }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870    unsafe {
9871        static_assert_sae!(SAE);
9872        let a = a.as_f32x16();
9873        let b = b.as_f32x16();
9874        let r = vminps(a, b, SAE);
9875        transmute(r)
9876    }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889    src: __m512,
9890    k: __mmask16,
9891    a: __m512,
9892    b: __m512,
9893) -> __m512 {
9894    unsafe {
9895        static_assert_sae!(SAE);
9896        let a = a.as_f32x16();
9897        let b = b.as_f32x16();
9898        let r = vminps(a, b, SAE);
9899        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9900    }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913    unsafe {
9914        static_assert_sae!(SAE);
9915        let a = a.as_f32x16();
9916        let b = b.as_f32x16();
9917        let r = vminps(a, b, SAE);
9918        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9919    }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932    unsafe {
9933        static_assert_sae!(SAE);
9934        let a = a.as_f64x8();
9935        let b = b.as_f64x8();
9936        let r = vminpd(a, b, SAE);
9937        transmute(r)
9938    }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951    src: __m512d,
9952    k: __mmask8,
9953    a: __m512d,
9954    b: __m512d,
9955) -> __m512d {
9956    unsafe {
9957        static_assert_sae!(SAE);
9958        let a = a.as_f64x8();
9959        let b = b.as_f64x8();
9960        let r = vminpd(a, b, SAE);
9961        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9962    }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975    unsafe {
9976        static_assert_sae!(SAE);
9977        let a = a.as_f64x8();
9978        let b = b.as_f64x8();
9979        let r = vminpd(a, b, SAE);
9980        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9981    }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994    unsafe {
9995        static_assert_sae!(SAE);
9996        let a = a.as_f32x16();
9997        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
9998        transmute(r)
9999    }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012    unsafe {
10013        static_assert_sae!(SAE);
10014        let a = a.as_f32x16();
10015        let src = src.as_f32x16();
10016        let r = vgetexpps(a, src, k, SAE);
10017        transmute(r)
10018    }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031    unsafe {
10032        static_assert_sae!(SAE);
10033        let a = a.as_f32x16();
10034        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10035        transmute(r)
10036    }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049    unsafe {
10050        static_assert_sae!(SAE);
10051        let a = a.as_f64x8();
10052        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10053        transmute(r)
10054    }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067    src: __m512d,
10068    k: __mmask8,
10069    a: __m512d,
10070) -> __m512d {
10071    unsafe {
10072        static_assert_sae!(SAE);
10073        let a = a.as_f64x8();
10074        let src = src.as_f64x8();
10075        let r = vgetexppd(a, src, k, SAE);
10076        transmute(r)
10077    }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090    unsafe {
10091        static_assert_sae!(SAE);
10092        let a = a.as_f64x8();
10093        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114    unsafe {
10115        static_assert_uimm_bits!(IMM8, 8);
10116        static_assert_mantissas_sae!(SAE);
10117        let a = a.as_f32x16();
10118        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10119        transmute(r)
10120    }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139    src: __m512,
10140    k: __mmask16,
10141    a: __m512,
10142) -> __m512 {
10143    unsafe {
10144        static_assert_uimm_bits!(IMM8, 8);
10145        static_assert_mantissas_sae!(SAE);
10146        let a = a.as_f32x16();
10147        let src = src.as_f32x16();
10148        let r = vrndscaleps(a, IMM8, src, k, SAE);
10149        transmute(r)
10150    }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169    k: __mmask16,
10170    a: __m512,
10171) -> __m512 {
10172    unsafe {
10173        static_assert_uimm_bits!(IMM8, 8);
10174        static_assert_mantissas_sae!(SAE);
10175        let a = a.as_f32x16();
10176        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10177        transmute(r)
10178    }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197    unsafe {
10198        static_assert_uimm_bits!(IMM8, 8);
10199        static_assert_mantissas_sae!(SAE);
10200        let a = a.as_f64x8();
10201        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10202        transmute(r)
10203    }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222    src: __m512d,
10223    k: __mmask8,
10224    a: __m512d,
10225) -> __m512d {
10226    unsafe {
10227        static_assert_uimm_bits!(IMM8, 8);
10228        static_assert_mantissas_sae!(SAE);
10229        let a = a.as_f64x8();
10230        let src = src.as_f64x8();
10231        let r = vrndscalepd(a, IMM8, src, k, SAE);
10232        transmute(r)
10233    }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252    k: __mmask8,
10253    a: __m512d,
10254) -> __m512d {
10255    unsafe {
10256        static_assert_uimm_bits!(IMM8, 8);
10257        static_assert_mantissas_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10260        transmute(r)
10261    }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280    unsafe {
10281        static_assert_rounding!(ROUNDING);
10282        let a = a.as_f32x16();
10283        let b = b.as_f32x16();
10284        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10285        transmute(r)
10286    }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305    src: __m512,
10306    k: __mmask16,
10307    a: __m512,
10308    b: __m512,
10309) -> __m512 {
10310    unsafe {
10311        static_assert_rounding!(ROUNDING);
10312        let a = a.as_f32x16();
10313        let b = b.as_f32x16();
10314        let src = src.as_f32x16();
10315        let r = vscalefps(a, b, src, k, ROUNDING);
10316        transmute(r)
10317    }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336    k: __mmask16,
10337    a: __m512,
10338    b: __m512,
10339) -> __m512 {
10340    unsafe {
10341        static_assert_rounding!(ROUNDING);
10342        let a = a.as_f32x16();
10343        let b = b.as_f32x16();
10344        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10345        transmute(r)
10346    }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365    unsafe {
10366        static_assert_rounding!(ROUNDING);
10367        let a = a.as_f64x8();
10368        let b = b.as_f64x8();
10369        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10370        transmute(r)
10371    }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390    src: __m512d,
10391    k: __mmask8,
10392    a: __m512d,
10393    b: __m512d,
10394) -> __m512d {
10395    unsafe {
10396        static_assert_rounding!(ROUNDING);
10397        let a = a.as_f64x8();
10398        let b = b.as_f64x8();
10399        let src = src.as_f64x8();
10400        let r = vscalefpd(a, b, src, k, ROUNDING);
10401        transmute(r)
10402    }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421    k: __mmask8,
10422    a: __m512d,
10423    b: __m512d,
10424) -> __m512d {
10425    unsafe {
10426        static_assert_rounding!(ROUNDING);
10427        let a = a.as_f64x8();
10428        let b = b.as_f64x8();
10429        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10430        transmute(r)
10431    }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444    a: __m512,
10445    b: __m512,
10446    c: __m512i,
10447) -> __m512 {
10448    unsafe {
10449        static_assert_uimm_bits!(IMM8, 8);
10450        static_assert_mantissas_sae!(SAE);
10451        let a = a.as_f32x16();
10452        let b = b.as_f32x16();
10453        let c = c.as_i32x16();
10454        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10455        transmute(r)
10456    }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469    a: __m512,
10470    k: __mmask16,
10471    b: __m512,
10472    c: __m512i,
10473) -> __m512 {
10474    unsafe {
10475        static_assert_uimm_bits!(IMM8, 8);
10476        static_assert_mantissas_sae!(SAE);
10477        let a = a.as_f32x16();
10478        let b = b.as_f32x16();
10479        let c = c.as_i32x16();
10480        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10481        transmute(r)
10482    }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495    k: __mmask16,
10496    a: __m512,
10497    b: __m512,
10498    c: __m512i,
10499) -> __m512 {
10500    unsafe {
10501        static_assert_uimm_bits!(IMM8, 8);
10502        static_assert_mantissas_sae!(SAE);
10503        let a = a.as_f32x16();
10504        let b = b.as_f32x16();
10505        let c = c.as_i32x16();
10506        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10507        transmute(r)
10508    }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521    a: __m512d,
10522    b: __m512d,
10523    c: __m512i,
10524) -> __m512d {
10525    unsafe {
10526        static_assert_uimm_bits!(IMM8, 8);
10527        static_assert_mantissas_sae!(SAE);
10528        let a = a.as_f64x8();
10529        let b = b.as_f64x8();
10530        let c = c.as_i64x8();
10531        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10532        transmute(r)
10533    }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546    a: __m512d,
10547    k: __mmask8,
10548    b: __m512d,
10549    c: __m512i,
10550) -> __m512d {
10551    unsafe {
10552        static_assert_uimm_bits!(IMM8, 8);
10553        static_assert_mantissas_sae!(SAE);
10554        let a = a.as_f64x8();
10555        let b = b.as_f64x8();
10556        let c = c.as_i64x8();
10557        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10558        transmute(r)
10559    }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572    k: __mmask8,
10573    a: __m512d,
10574    b: __m512d,
10575    c: __m512i,
10576) -> __m512d {
10577    unsafe {
10578        static_assert_uimm_bits!(IMM8, 8);
10579        static_assert_mantissas_sae!(SAE);
10580        let a = a.as_f64x8();
10581        let b = b.as_f64x8();
10582        let c = c.as_i64x8();
10583        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10584        transmute(r)
10585    }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595///    _MM_MANT_SIGN_src     // sign = sign(src)\
10596///    _MM_MANT_SIGN_zero    // sign = 0\
10597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607    const NORM: _MM_MANTISSA_NORM_ENUM,
10608    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609    const SAE: i32,
10610>(
10611    a: __m512,
10612) -> __m512 {
10613    unsafe {
10614        static_assert_uimm_bits!(NORM, 4);
10615        static_assert_uimm_bits!(SIGN, 2);
10616        static_assert_mantissas_sae!(SAE);
10617        let a = a.as_f32x16();
10618        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10619        transmute(r)
10620    }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10626///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10627///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10628///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630///    _MM_MANT_SIGN_src     // sign = sign(src)\
10631///    _MM_MANT_SIGN_zero    // sign = 0\
10632///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642    const NORM: _MM_MANTISSA_NORM_ENUM,
10643    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644    const SAE: i32,
10645>(
10646    src: __m512,
10647    k: __mmask16,
10648    a: __m512,
10649) -> __m512 {
10650    unsafe {
10651        static_assert_uimm_bits!(NORM, 4);
10652        static_assert_uimm_bits!(SIGN, 2);
10653        static_assert_mantissas_sae!(SAE);
10654        let a = a.as_f32x16();
10655        let src = src.as_f32x16();
10656        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10657        transmute(r)
10658    }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10664///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10665///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10666///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668///    _MM_MANT_SIGN_src     // sign = sign(src)\
10669///    _MM_MANT_SIGN_zero    // sign = 0\
10670///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680    const NORM: _MM_MANTISSA_NORM_ENUM,
10681    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682    const SAE: i32,
10683>(
10684    k: __mmask16,
10685    a: __m512,
10686) -> __m512 {
10687    unsafe {
10688        static_assert_uimm_bits!(NORM, 4);
10689        static_assert_uimm_bits!(SIGN, 2);
10690        static_assert_mantissas_sae!(SAE);
10691        let a = a.as_f32x16();
10692        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10693        transmute(r)
10694    }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10700///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10701///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10702///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704///    _MM_MANT_SIGN_src     // sign = sign(src)\
10705///    _MM_MANT_SIGN_zero    // sign = 0\
10706///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716    const NORM: _MM_MANTISSA_NORM_ENUM,
10717    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718    const SAE: i32,
10719>(
10720    a: __m512d,
10721) -> __m512d {
10722    unsafe {
10723        static_assert_uimm_bits!(NORM, 4);
10724        static_assert_uimm_bits!(SIGN, 2);
10725        static_assert_mantissas_sae!(SAE);
10726        let a = a.as_f64x8();
10727        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
10728        transmute(r)
10729    }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10735///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10736///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10737///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739///    _MM_MANT_SIGN_src     // sign = sign(src)\
10740///    _MM_MANT_SIGN_zero    // sign = 0\
10741///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751    const NORM: _MM_MANTISSA_NORM_ENUM,
10752    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753    const SAE: i32,
10754>(
10755    src: __m512d,
10756    k: __mmask8,
10757    a: __m512d,
10758) -> __m512d {
10759    unsafe {
10760        static_assert_uimm_bits!(NORM, 4);
10761        static_assert_uimm_bits!(SIGN, 2);
10762        static_assert_mantissas_sae!(SAE);
10763        let a = a.as_f64x8();
10764        let src = src.as_f64x8();
10765        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
10766        transmute(r)
10767    }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10773///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10774///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10775///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777///    _MM_MANT_SIGN_src     // sign = sign(src)\
10778///    _MM_MANT_SIGN_zero    // sign = 0\
10779///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789    const NORM: _MM_MANTISSA_NORM_ENUM,
10790    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791    const SAE: i32,
10792>(
10793    k: __mmask8,
10794    a: __m512d,
10795) -> __m512d {
10796    unsafe {
10797        static_assert_uimm_bits!(NORM, 4);
10798        static_assert_uimm_bits!(SIGN, 2);
10799        static_assert_mantissas_sae!(SAE);
10800        let a = a.as_f64x8();
10801        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
10802        transmute(r)
10803    }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814    unsafe {
10815        transmute(vcvtps2dq(
10816            a.as_f32x16(),
10817            i32x16::ZERO,
10818            0b11111111_11111111,
10819            _MM_FROUND_CUR_DIRECTION,
10820        ))
10821    }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832    unsafe {
10833        transmute(vcvtps2dq(
10834            a.as_f32x16(),
10835            src.as_i32x16(),
10836            k,
10837            _MM_FROUND_CUR_DIRECTION,
10838        ))
10839    }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850    unsafe {
10851        transmute(vcvtps2dq(
10852            a.as_f32x16(),
10853            i32x16::ZERO,
10854            k,
10855            _MM_FROUND_CUR_DIRECTION,
10856        ))
10857    }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868    unsafe {
10869        let convert = _mm256_cvtps_epi32(a);
10870        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
10871    }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882    unsafe {
10883        let convert = _mm256_cvtps_epi32(a);
10884        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
10885    }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896    unsafe {
10897        let convert = _mm_cvtps_epi32(a);
10898        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10899    }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910    unsafe {
10911        let convert = _mm_cvtps_epi32(a);
10912        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10913    }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924    unsafe {
10925        transmute(vcvtps2udq(
10926            a.as_f32x16(),
10927            u32x16::ZERO,
10928            0b11111111_11111111,
10929            _MM_FROUND_CUR_DIRECTION,
10930        ))
10931    }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942    unsafe {
10943        transmute(vcvtps2udq(
10944            a.as_f32x16(),
10945            src.as_u32x16(),
10946            k,
10947            _MM_FROUND_CUR_DIRECTION,
10948        ))
10949    }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960    unsafe {
10961        transmute(vcvtps2udq(
10962            a.as_f32x16(),
10963            u32x16::ZERO,
10964            k,
10965            _MM_FROUND_CUR_DIRECTION,
10966        ))
10967    }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044    unsafe {
11045        transmute(vcvtps2pd(
11046            a.as_f32x8(),
11047            f64x8::ZERO,
11048            0b11111111,
11049            _MM_FROUND_CUR_DIRECTION,
11050        ))
11051    }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062    unsafe {
11063        transmute(vcvtps2pd(
11064            a.as_f32x8(),
11065            src.as_f64x8(),
11066            k,
11067            _MM_FROUND_CUR_DIRECTION,
11068        ))
11069    }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080    unsafe {
11081        transmute(vcvtps2pd(
11082            a.as_f32x8(),
11083            f64x8::ZERO,
11084            k,
11085            _MM_FROUND_CUR_DIRECTION,
11086        ))
11087    }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098    unsafe {
11099        transmute(vcvtps2pd(
11100            _mm512_castps512_ps256(v2).as_f32x8(),
11101            f64x8::ZERO,
11102            0b11111111,
11103            _MM_FROUND_CUR_DIRECTION,
11104        ))
11105    }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116    unsafe {
11117        transmute(vcvtps2pd(
11118            _mm512_castps512_ps256(v2).as_f32x8(),
11119            src.as_f64x8(),
11120            k,
11121            _MM_FROUND_CUR_DIRECTION,
11122        ))
11123    }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134    unsafe {
11135        transmute(vcvtpd2ps(
11136            a.as_f64x8(),
11137            f32x8::ZERO,
11138            0b11111111,
11139            _MM_FROUND_CUR_DIRECTION,
11140        ))
11141    }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152    unsafe {
11153        transmute(vcvtpd2ps(
11154            a.as_f64x8(),
11155            src.as_f32x8(),
11156            k,
11157            _MM_FROUND_CUR_DIRECTION,
11158        ))
11159    }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170    unsafe {
11171        transmute(vcvtpd2ps(
11172            a.as_f64x8(),
11173            f32x8::ZERO,
11174            k,
11175            _MM_FROUND_CUR_DIRECTION,
11176        ))
11177    }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188    unsafe {
11189        let convert = _mm256_cvtpd_ps(a);
11190        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11191    }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202    unsafe {
11203        let convert = _mm256_cvtpd_ps(a);
11204        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11205    }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216    unsafe {
11217        let convert = _mm_cvtpd_ps(a);
11218        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11219    }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230    unsafe {
11231        let convert = _mm_cvtpd_ps(a);
11232        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11233    }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244    unsafe {
11245        transmute(vcvtpd2dq(
11246            a.as_f64x8(),
11247            i32x8::ZERO,
11248            0b11111111,
11249            _MM_FROUND_CUR_DIRECTION,
11250        ))
11251    }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262    unsafe {
11263        transmute(vcvtpd2dq(
11264            a.as_f64x8(),
11265            src.as_i32x8(),
11266            k,
11267            _MM_FROUND_CUR_DIRECTION,
11268        ))
11269    }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280    unsafe {
11281        transmute(vcvtpd2dq(
11282            a.as_f64x8(),
11283            i32x8::ZERO,
11284            k,
11285            _MM_FROUND_CUR_DIRECTION,
11286        ))
11287    }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298    unsafe {
11299        let convert = _mm256_cvtpd_epi32(a);
11300        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11301    }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312    unsafe {
11313        let convert = _mm256_cvtpd_epi32(a);
11314        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11315    }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326    unsafe {
11327        let convert = _mm_cvtpd_epi32(a);
11328        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11329    }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340    unsafe {
11341        let convert = _mm_cvtpd_epi32(a);
11342        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11343    }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354    unsafe {
11355        transmute(vcvtpd2udq(
11356            a.as_f64x8(),
11357            u32x8::ZERO,
11358            0b11111111,
11359            _MM_FROUND_CUR_DIRECTION,
11360        ))
11361    }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372    unsafe {
11373        transmute(vcvtpd2udq(
11374            a.as_f64x8(),
11375            src.as_u32x8(),
11376            k,
11377            _MM_FROUND_CUR_DIRECTION,
11378        ))
11379    }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390    unsafe {
11391        transmute(vcvtpd2udq(
11392            a.as_f64x8(),
11393            u32x8::ZERO,
11394            k,
11395            _MM_FROUND_CUR_DIRECTION,
11396        ))
11397    }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474    unsafe {
11475        let r: f32x8 = vcvtpd2ps(
11476            v2.as_f64x8(),
11477            f32x8::ZERO,
11478            0b11111111,
11479            _MM_FROUND_CUR_DIRECTION,
11480        );
11481        simd_shuffle!(
11482            r,
11483            f32x8::ZERO,
11484            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485        )
11486    }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497    unsafe {
11498        let r: f32x8 = vcvtpd2ps(
11499            v2.as_f64x8(),
11500            _mm512_castps512_ps256(src).as_f32x8(),
11501            k,
11502            _MM_FROUND_CUR_DIRECTION,
11503        );
11504        simd_shuffle!(
11505            r,
11506            f32x8::ZERO,
11507            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508        )
11509    }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520    unsafe {
11521        let a = a.as_i8x16();
11522        transmute::<i32x16, _>(simd_cast(a))
11523    }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534    unsafe {
11535        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11536        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11537    }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548    unsafe {
11549        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11550        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11551    }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562    unsafe {
11563        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11564        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11565    }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576    unsafe {
11577        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11578        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11579    }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590    unsafe {
11591        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11592        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11593    }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604    unsafe {
11605        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11606        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11607    }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618    unsafe {
11619        let a = a.as_i8x16();
11620        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621        transmute::<i64x8, _>(simd_cast(v64))
11622    }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633    unsafe {
11634        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11635        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11636    }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647    unsafe {
11648        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11649        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11650    }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661    unsafe {
11662        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11663        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11664    }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675    unsafe {
11676        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11677        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11678    }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe {
11690        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11691        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11692    }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703    unsafe {
11704        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11705        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11706    }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717    unsafe {
11718        let a = a.as_u8x16();
11719        transmute::<i32x16, _>(simd_cast(a))
11720    }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731    unsafe {
11732        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11733        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11734    }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745    unsafe {
11746        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11747        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11748    }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759    unsafe {
11760        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11761        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11762    }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773    unsafe {
11774        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11775        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11776    }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787    unsafe {
11788        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11789        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11790    }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801    unsafe {
11802        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11803        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11804    }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815    unsafe {
11816        let a = a.as_u8x16();
11817        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818        transmute::<i64x8, _>(simd_cast(v64))
11819    }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830    unsafe {
11831        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11832        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11833    }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844    unsafe {
11845        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11846        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11847    }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858    unsafe {
11859        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11860        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11861    }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872    unsafe {
11873        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11874        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11875    }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11888        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11889    }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11902        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11903    }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914    unsafe {
11915        let a = a.as_i16x16();
11916        transmute::<i32x16, _>(simd_cast(a))
11917    }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928    unsafe {
11929        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11930        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11931    }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942    unsafe {
11943        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11944        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11945    }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956    unsafe {
11957        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11958        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11959    }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970    unsafe {
11971        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11972        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11973    }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984    unsafe {
11985        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11986        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11987    }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998    unsafe {
11999        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12000        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12001    }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012    unsafe {
12013        let a = a.as_i16x8();
12014        transmute::<i64x8, _>(simd_cast(a))
12015    }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026    unsafe {
12027        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12028        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12029    }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040    unsafe {
12041        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12042        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12043    }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054    unsafe {
12055        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12056        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12057    }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068    unsafe {
12069        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12070        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12071    }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12084        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12085    }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096    unsafe {
12097        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12098        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12099    }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110    unsafe {
12111        let a = a.as_u16x16();
12112        transmute::<i32x16, _>(simd_cast(a))
12113    }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124    unsafe {
12125        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12126        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12127    }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138    unsafe {
12139        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12140        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12141    }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152    unsafe {
12153        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12154        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12155    }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166    unsafe {
12167        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12168        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12169    }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180    unsafe {
12181        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12182        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12183    }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194    unsafe {
12195        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12196        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12197    }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208    unsafe {
12209        let a = a.as_u16x8();
12210        transmute::<i64x8, _>(simd_cast(a))
12211    }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222    unsafe {
12223        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12224        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12225    }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236    unsafe {
12237        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12238        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12239    }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250    unsafe {
12251        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12252        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12253    }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264    unsafe {
12265        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12266        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12267    }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278    unsafe {
12279        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12280        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12281    }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292    unsafe {
12293        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12294        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12295    }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306    unsafe {
12307        let a = a.as_i32x8();
12308        transmute::<i64x8, _>(simd_cast(a))
12309    }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320    unsafe {
12321        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12322        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12323    }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334    unsafe {
12335        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12336        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12337    }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348    unsafe {
12349        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12350        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12351    }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362    unsafe {
12363        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12364        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12365    }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376    unsafe {
12377        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12378        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12379    }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390    unsafe {
12391        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12392        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12393    }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404    unsafe {
12405        let a = a.as_u32x8();
12406        transmute::<i64x8, _>(simd_cast(a))
12407    }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418    unsafe {
12419        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12420        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12421    }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432    unsafe {
12433        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12434        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12435    }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446    unsafe {
12447        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12448        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12449    }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460    unsafe {
12461        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12462        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12463    }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474    unsafe {
12475        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12476        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12477    }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488    unsafe {
12489        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12490        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12491    }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502    unsafe {
12503        let a = a.as_i32x16();
12504        transmute::<f32x16, _>(simd_cast(a))
12505    }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516    unsafe {
12517        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12518        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12519    }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530    unsafe {
12531        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12532        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12533    }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544    unsafe {
12545        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12546        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12547    }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558    unsafe {
12559        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12560        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12561    }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572    unsafe {
12573        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12574        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12575    }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586    unsafe {
12587        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12588        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12589    }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600    unsafe {
12601        let a = a.as_i32x8();
12602        transmute::<f64x8, _>(simd_cast(a))
12603    }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614    unsafe {
12615        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12616        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12617    }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628    unsafe {
12629        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12630        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12631    }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642    unsafe {
12643        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12644        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12645    }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656    unsafe {
12657        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12658        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12659    }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670    unsafe {
12671        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12672        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12673    }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684    unsafe {
12685        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12686        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12687    }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698    unsafe {
12699        let a = a.as_u32x16();
12700        transmute::<f32x16, _>(simd_cast(a))
12701    }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712    unsafe {
12713        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12714        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12715    }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726    unsafe {
12727        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12728        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12729    }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740    unsafe {
12741        let a = a.as_u32x8();
12742        transmute::<f64x8, _>(simd_cast(a))
12743    }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754    unsafe {
12755        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12756        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12757    }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768    unsafe {
12769        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12770        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12771    }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782    unsafe {
12783        let a = a.as_u32x4();
12784        transmute::<f64x4, _>(simd_cast(a))
12785    }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796    unsafe {
12797        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12798        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12799    }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810    unsafe {
12811        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12812        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12813    }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824    unsafe {
12825        let a = a.as_u32x4();
12826        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827        transmute::<f64x2, _>(simd_cast(u64))
12828    }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839    unsafe {
12840        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12841        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12842    }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853    unsafe {
12854        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12855        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12856    }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867    unsafe {
12868        let v2 = v2.as_i32x16();
12869        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870        transmute::<f64x8, _>(simd_cast(v256))
12871    }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882    unsafe {
12883        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12885    }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896    unsafe {
12897        let v2 = v2.as_u32x16();
12898        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899        transmute::<f64x8, _>(simd_cast(v256))
12900    }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911    unsafe {
12912        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12914    }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925    unsafe {
12926        let a = a.as_i32x16();
12927        transmute::<i16x16, _>(simd_cast(a))
12928    }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939    unsafe {
12940        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12941        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12942    }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953    unsafe {
12954        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12955        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12956    }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967    unsafe {
12968        let a = a.as_i32x8();
12969        transmute::<i16x8, _>(simd_cast(a))
12970    }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981    unsafe {
12982        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12983        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12984    }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995    unsafe {
12996        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12997        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12998    }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042    unsafe {
13043        let a = a.as_i32x16();
13044        transmute::<i8x16, _>(simd_cast(a))
13045    }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056    unsafe {
13057        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13058        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13059    }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070    unsafe {
13071        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13072        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13073    }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150    unsafe {
13151        let a = a.as_i64x8();
13152        transmute::<i32x8, _>(simd_cast(a))
13153    }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164    unsafe {
13165        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13166        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13167    }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178    unsafe {
13179        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13180        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13181    }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192    unsafe {
13193        let a = a.as_i64x4();
13194        transmute::<i32x4, _>(simd_cast(a))
13195    }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206    unsafe {
13207        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13208        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13209    }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220    unsafe {
13221        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13222        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13223    }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267    unsafe {
13268        let a = a.as_i64x8();
13269        transmute::<i16x8, _>(simd_cast(a))
13270    }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281    unsafe {
13282        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13283        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13284    }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295    unsafe {
13296        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13297        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13298    }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472    unsafe {
14473        static_assert_rounding!(ROUNDING);
14474        let a = a.as_f32x16();
14475        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14476        transmute(r)
14477    }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496    src: __m512i,
14497    k: __mmask16,
14498    a: __m512,
14499) -> __m512i {
14500    unsafe {
14501        static_assert_rounding!(ROUNDING);
14502        let a = a.as_f32x16();
14503        let src = src.as_i32x16();
14504        let r = vcvtps2dq(a, src, k, ROUNDING);
14505        transmute(r)
14506    }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525    unsafe {
14526        static_assert_rounding!(ROUNDING);
14527        let a = a.as_f32x16();
14528        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14529        transmute(r)
14530    }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549    unsafe {
14550        static_assert_rounding!(ROUNDING);
14551        let a = a.as_f32x16();
14552        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14553        transmute(r)
14554    }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573    src: __m512i,
14574    k: __mmask16,
14575    a: __m512,
14576) -> __m512i {
14577    unsafe {
14578        static_assert_rounding!(ROUNDING);
14579        let a = a.as_f32x16();
14580        let src = src.as_u32x16();
14581        let r = vcvtps2udq(a, src, k, ROUNDING);
14582        transmute(r)
14583    }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602    unsafe {
14603        static_assert_rounding!(ROUNDING);
14604        let a = a.as_f32x16();
14605        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14606        transmute(r)
14607    }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620    unsafe {
14621        static_assert_sae!(SAE);
14622        let a = a.as_f32x8();
14623        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
14624        transmute(r)
14625    }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638    unsafe {
14639        static_assert_sae!(SAE);
14640        let a = a.as_f32x8();
14641        let src = src.as_f64x8();
14642        let r = vcvtps2pd(a, src, k, SAE);
14643        transmute(r)
14644    }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657    unsafe {
14658        static_assert_sae!(SAE);
14659        let a = a.as_f32x8();
14660        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
14661        transmute(r)
14662    }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681    unsafe {
14682        static_assert_rounding!(ROUNDING);
14683        let a = a.as_f64x8();
14684        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
14685        transmute(r)
14686    }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705    src: __m256i,
14706    k: __mmask8,
14707    a: __m512d,
14708) -> __m256i {
14709    unsafe {
14710        static_assert_rounding!(ROUNDING);
14711        let a = a.as_f64x8();
14712        let src = src.as_i32x8();
14713        let r = vcvtpd2dq(a, src, k, ROUNDING);
14714        transmute(r)
14715    }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734    unsafe {
14735        static_assert_rounding!(ROUNDING);
14736        let a = a.as_f64x8();
14737        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
14738        transmute(r)
14739    }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758    unsafe {
14759        static_assert_rounding!(ROUNDING);
14760        let a = a.as_f64x8();
14761        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
14762        transmute(r)
14763    }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782    src: __m256i,
14783    k: __mmask8,
14784    a: __m512d,
14785) -> __m256i {
14786    unsafe {
14787        static_assert_rounding!(ROUNDING);
14788        let a = a.as_f64x8();
14789        let src = src.as_u32x8();
14790        let r = vcvtpd2udq(a, src, k, ROUNDING);
14791        transmute(r)
14792    }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811    unsafe {
14812        static_assert_rounding!(ROUNDING);
14813        let a = a.as_f64x8();
14814        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
14815        transmute(r)
14816    }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835    unsafe {
14836        static_assert_rounding!(ROUNDING);
14837        let a = a.as_f64x8();
14838        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
14839        transmute(r)
14840    }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859    src: __m256,
14860    k: __mmask8,
14861    a: __m512d,
14862) -> __m256 {
14863    unsafe {
14864        static_assert_rounding!(ROUNDING);
14865        let a = a.as_f64x8();
14866        let src = src.as_f32x8();
14867        let r = vcvtpd2ps(a, src, k, ROUNDING);
14868        transmute(r)
14869    }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888    unsafe {
14889        static_assert_rounding!(ROUNDING);
14890        let a = a.as_f64x8();
14891        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
14892        transmute(r)
14893    }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912    unsafe {
14913        static_assert_rounding!(ROUNDING);
14914        let a = a.as_i32x16();
14915        let r = vcvtdq2ps(a, ROUNDING);
14916        transmute(r)
14917    }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936    src: __m512,
14937    k: __mmask16,
14938    a: __m512i,
14939) -> __m512 {
14940    unsafe {
14941        static_assert_rounding!(ROUNDING);
14942        let a = a.as_i32x16();
14943        let r = vcvtdq2ps(a, ROUNDING);
14944        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
14945    }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964    unsafe {
14965        static_assert_rounding!(ROUNDING);
14966        let a = a.as_i32x16();
14967        let r = vcvtdq2ps(a, ROUNDING);
14968        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
14969    }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988    unsafe {
14989        static_assert_rounding!(ROUNDING);
14990        let a = a.as_u32x16();
14991        let r = vcvtudq2ps(a, ROUNDING);
14992        transmute(r)
14993    }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012    src: __m512,
15013    k: __mmask16,
15014    a: __m512i,
15015) -> __m512 {
15016    unsafe {
15017        static_assert_rounding!(ROUNDING);
15018        let a = a.as_u32x16();
15019        let r = vcvtudq2ps(a, ROUNDING);
15020        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15021    }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040    unsafe {
15041        static_assert_rounding!(ROUNDING);
15042        let a = a.as_u32x16();
15043        let r = vcvtudq2ps(a, ROUNDING);
15044        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15045    }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15050///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15051///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15052///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15053///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15054///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15055///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15056///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15057///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15058///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15059///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15060///
15061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15062#[inline]
15063#[target_feature(enable = "avx512f")]
15064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15065#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15066#[rustc_legacy_const_generics(1)]
15067pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15068    unsafe {
15069        static_assert_extended_rounding!(ROUNDING);
15070        let a = a.as_f32x16();
15071        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15072        transmute(r)
15073    }
15074}
15075
15076/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15077/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15078///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15079///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15080///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15081///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15082///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15083///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15084///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15085///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15086///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15087///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15088///
15089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15090#[inline]
15091#[target_feature(enable = "avx512f")]
15092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15093#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15094#[rustc_legacy_const_generics(3)]
15095pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15096    src: __m256i,
15097    k: __mmask16,
15098    a: __m512,
15099) -> __m256i {
15100    unsafe {
15101        static_assert_extended_rounding!(ROUNDING);
15102        let a = a.as_f32x16();
15103        let src = src.as_i16x16();
15104        let r = vcvtps2ph(a, ROUNDING, src, k);
15105        transmute(r)
15106    }
15107}
15108
15109/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15111///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15112///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15113///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15114///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15115///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15116///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15117///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15118///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15119///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15120///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15121///
15122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15123#[inline]
15124#[target_feature(enable = "avx512f")]
15125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15126#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15127#[rustc_legacy_const_generics(2)]
15128pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15129    unsafe {
15130        static_assert_extended_rounding!(ROUNDING);
15131        let a = a.as_f32x16();
15132        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15133        transmute(r)
15134    }
15135}
15136
15137/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15138/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15139/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15140/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15141/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15142/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15143/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15144///
15145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15146#[inline]
15147#[target_feature(enable = "avx512f,avx512vl")]
15148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15149#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15150#[rustc_legacy_const_generics(3)]
15151pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15152    src: __m128i,
15153    k: __mmask8,
15154    a: __m256,
15155) -> __m128i {
15156    unsafe {
15157        static_assert_uimm_bits!(IMM8, 8);
15158        let a = a.as_f32x8();
15159        let src = src.as_i16x8();
15160        let r = vcvtps2ph256(a, IMM8, src, k);
15161        transmute(r)
15162    }
15163}
15164
15165/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15166/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15167/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15168/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15169/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15170/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15171/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15172///
15173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15174#[inline]
15175#[target_feature(enable = "avx512f,avx512vl")]
15176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15177#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15178#[rustc_legacy_const_generics(2)]
15179pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15180    unsafe {
15181        static_assert_uimm_bits!(IMM8, 8);
15182        let a = a.as_f32x8();
15183        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15184        transmute(r)
15185    }
15186}
15187
15188/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15189/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15190/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15191/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15192/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15193/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15194/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15195///
15196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15197#[inline]
15198#[target_feature(enable = "avx512f,avx512vl")]
15199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15200#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15201#[rustc_legacy_const_generics(3)]
15202pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15203    unsafe {
15204        static_assert_uimm_bits!(IMM8, 8);
15205        let a = a.as_f32x4();
15206        let src = src.as_i16x8();
15207        let r = vcvtps2ph128(a, IMM8, src, k);
15208        transmute(r)
15209    }
15210}
15211
15212/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15213/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15214/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15215/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15216/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15217/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15218/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15219///
15220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15221#[inline]
15222#[target_feature(enable = "avx512f,avx512vl")]
15223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15224#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15225#[rustc_legacy_const_generics(2)]
15226pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15227    unsafe {
15228        static_assert_uimm_bits!(IMM8, 8);
15229        let a = a.as_f32x4();
15230        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15231        transmute(r)
15232    }
15233}
15234
15235/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15236/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15237///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15238///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15239///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15240///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15241///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15242///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15243///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15244///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15245///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15246///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15247///
15248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15249#[inline]
15250#[target_feature(enable = "avx512f")]
15251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15252#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15253#[rustc_legacy_const_generics(1)]
15254pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15255    unsafe {
15256        static_assert_extended_rounding!(ROUNDING);
15257        let a = a.as_f32x16();
15258        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15259        transmute(r)
15260    }
15261}
15262
15263/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15264/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15265///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15266///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15267///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15268///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15269///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15270///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15271///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15272///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15273///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15274///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15275///
15276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15277#[inline]
15278#[target_feature(enable = "avx512f")]
15279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15280#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15281#[rustc_legacy_const_generics(3)]
15282pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15283    unsafe {
15284        static_assert_extended_rounding!(ROUNDING);
15285        let a = a.as_f32x16();
15286        let src = src.as_i16x16();
15287        let r = vcvtps2ph(a, ROUNDING, src, k);
15288        transmute(r)
15289    }
15290}
15291
15292/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15293/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15294///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15295///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15296///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15297///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15298///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15299///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15300///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15301///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15302///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15303///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15304///
15305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15306#[inline]
15307#[target_feature(enable = "avx512f")]
15308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15309#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15310#[rustc_legacy_const_generics(2)]
15311pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15312    unsafe {
15313        static_assert_extended_rounding!(ROUNDING);
15314        let a = a.as_f32x16();
15315        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15316        transmute(r)
15317    }
15318}
15319
15320/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15321/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15322/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15323/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15324/// * [`_MM_FROUND_TO_POS_INF`] : round up
15325/// * [`_MM_FROUND_TO_ZERO`] : truncate
15326/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15327///
15328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15329#[inline]
15330#[target_feature(enable = "avx512f,avx512vl")]
15331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15332#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15333#[rustc_legacy_const_generics(3)]
15334pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15335    unsafe {
15336        static_assert_uimm_bits!(IMM8, 8);
15337        let a = a.as_f32x8();
15338        let src = src.as_i16x8();
15339        let r = vcvtps2ph256(a, IMM8, src, k);
15340        transmute(r)
15341    }
15342}
15343
15344/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15345/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15346/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15347/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15348/// * [`_MM_FROUND_TO_POS_INF`] : round up
15349/// * [`_MM_FROUND_TO_ZERO`] : truncate
15350/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15351///
15352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15353#[inline]
15354#[target_feature(enable = "avx512f,avx512vl")]
15355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15356#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15357#[rustc_legacy_const_generics(2)]
15358pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15359    unsafe {
15360        static_assert_uimm_bits!(IMM8, 8);
15361        let a = a.as_f32x8();
15362        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15363        transmute(r)
15364    }
15365}
15366
15367/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15368/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15369/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15370/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15371/// * [`_MM_FROUND_TO_POS_INF`] : round up
15372/// * [`_MM_FROUND_TO_ZERO`] : truncate
15373/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15376#[inline]
15377#[target_feature(enable = "avx512f,avx512vl")]
15378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15379#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15382    unsafe {
15383        static_assert_uimm_bits!(IMM8, 8);
15384        let a = a.as_f32x4();
15385        let src = src.as_i16x8();
15386        let r = vcvtps2ph128(a, IMM8, src, k);
15387        transmute(r)
15388    }
15389}
15390
15391/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15393/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15394/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15395/// * [`_MM_FROUND_TO_POS_INF`] : round up
15396/// * [`_MM_FROUND_TO_ZERO`] : truncate
15397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15398///
15399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15400#[inline]
15401#[target_feature(enable = "avx512f,avx512vl")]
15402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15403#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15404#[rustc_legacy_const_generics(2)]
15405pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15406    unsafe {
15407        static_assert_uimm_bits!(IMM8, 8);
15408        let a = a.as_f32x4();
15409        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15410        transmute(r)
15411    }
15412}
15413
15414/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15415/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15416///
15417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15418#[inline]
15419#[target_feature(enable = "avx512f")]
15420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15421#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15422#[rustc_legacy_const_generics(1)]
15423pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15424    unsafe {
15425        static_assert_sae!(SAE);
15426        let a = a.as_i16x16();
15427        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15428        transmute(r)
15429    }
15430}
15431
15432/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15433/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15434///
15435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15436#[inline]
15437#[target_feature(enable = "avx512f")]
15438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15439#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15440#[rustc_legacy_const_generics(3)]
15441pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15442    unsafe {
15443        static_assert_sae!(SAE);
15444        let a = a.as_i16x16();
15445        let src = src.as_f32x16();
15446        let r = vcvtph2ps(a, src, k, SAE);
15447        transmute(r)
15448    }
15449}
15450
15451/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15452/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15459#[rustc_legacy_const_generics(2)]
15460pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15461    unsafe {
15462        static_assert_sae!(SAE);
15463        let a = a.as_i16x16();
15464        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15465        transmute(r)
15466    }
15467}
15468
15469/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15470///
15471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15472#[inline]
15473#[target_feature(enable = "avx512f")]
15474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15475#[cfg_attr(test, assert_instr(vcvtph2ps))]
15476pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15477    unsafe {
15478        transmute(vcvtph2ps(
15479            a.as_i16x16(),
15480            f32x16::ZERO,
15481            0b11111111_11111111,
15482            _MM_FROUND_NO_EXC,
15483        ))
15484    }
15485}
15486
15487/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15488///
15489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15490#[inline]
15491#[target_feature(enable = "avx512f")]
15492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15493#[cfg_attr(test, assert_instr(vcvtph2ps))]
15494pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15495    unsafe {
15496        transmute(vcvtph2ps(
15497            a.as_i16x16(),
15498            src.as_f32x16(),
15499            k,
15500            _MM_FROUND_NO_EXC,
15501        ))
15502    }
15503}
15504
15505/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15506///
15507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15508#[inline]
15509#[target_feature(enable = "avx512f")]
15510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15511#[cfg_attr(test, assert_instr(vcvtph2ps))]
15512pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15513    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15514}
15515
15516/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15517///
15518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15519#[inline]
15520#[target_feature(enable = "avx512f,avx512vl")]
15521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15522#[cfg_attr(test, assert_instr(vcvtph2ps))]
15523pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15524    unsafe {
15525        let convert = _mm256_cvtph_ps(a);
15526        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15527    }
15528}
15529
15530/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15531///
15532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15533#[inline]
15534#[target_feature(enable = "avx512f,avx512vl")]
15535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15536#[cfg_attr(test, assert_instr(vcvtph2ps))]
15537pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15538    unsafe {
15539        let convert = _mm256_cvtph_ps(a);
15540        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15541    }
15542}
15543
15544/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15545///
15546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15547#[inline]
15548#[target_feature(enable = "avx512f,avx512vl")]
15549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15550#[cfg_attr(test, assert_instr(vcvtph2ps))]
15551pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15552    unsafe {
15553        let convert = _mm_cvtph_ps(a);
15554        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15555    }
15556}
15557
15558/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15559///
15560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15561#[inline]
15562#[target_feature(enable = "avx512f,avx512vl")]
15563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15564#[cfg_attr(test, assert_instr(vcvtph2ps))]
15565pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15566    unsafe {
15567        let convert = _mm_cvtph_ps(a);
15568        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15569    }
15570}
15571
15572/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15573/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15574///
15575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15576#[inline]
15577#[target_feature(enable = "avx512f")]
15578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15579#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15580#[rustc_legacy_const_generics(1)]
15581pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15582    unsafe {
15583        static_assert_sae!(SAE);
15584        let a = a.as_f32x16();
15585        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15586        transmute(r)
15587    }
15588}
15589
15590/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15591/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15592///
15593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15594#[inline]
15595#[target_feature(enable = "avx512f")]
15596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15597#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15598#[rustc_legacy_const_generics(3)]
15599pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15600    src: __m512i,
15601    k: __mmask16,
15602    a: __m512,
15603) -> __m512i {
15604    unsafe {
15605        static_assert_sae!(SAE);
15606        let a = a.as_f32x16();
15607        let src = src.as_i32x16();
15608        let r = vcvttps2dq(a, src, k, SAE);
15609        transmute(r)
15610    }
15611}
15612
15613/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15614/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15615///
15616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15617#[inline]
15618#[target_feature(enable = "avx512f")]
15619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15620#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15621#[rustc_legacy_const_generics(2)]
15622pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15623    unsafe {
15624        static_assert_sae!(SAE);
15625        let a = a.as_f32x16();
15626        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
15627        transmute(r)
15628    }
15629}
15630
15631/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15632/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15633///
15634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15635#[inline]
15636#[target_feature(enable = "avx512f")]
15637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15638#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15639#[rustc_legacy_const_generics(1)]
15640pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15641    unsafe {
15642        static_assert_sae!(SAE);
15643        let a = a.as_f32x16();
15644        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
15645        transmute(r)
15646    }
15647}
15648
15649/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15650/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15651///
15652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15653#[inline]
15654#[target_feature(enable = "avx512f")]
15655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15656#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15657#[rustc_legacy_const_generics(3)]
15658pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15659    src: __m512i,
15660    k: __mmask16,
15661    a: __m512,
15662) -> __m512i {
15663    unsafe {
15664        static_assert_sae!(SAE);
15665        let a = a.as_f32x16();
15666        let src = src.as_u32x16();
15667        let r = vcvttps2udq(a, src, k, SAE);
15668        transmute(r)
15669    }
15670}
15671
15672/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15673/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15674///
15675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15676#[inline]
15677#[target_feature(enable = "avx512f")]
15678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15679#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15680#[rustc_legacy_const_generics(2)]
15681pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15682    unsafe {
15683        static_assert_sae!(SAE);
15684        let a = a.as_f32x16();
15685        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
15686        transmute(r)
15687    }
15688}
15689
15690/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15692///
15693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15694#[inline]
15695#[target_feature(enable = "avx512f")]
15696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15697#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15698#[rustc_legacy_const_generics(1)]
15699pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15700    unsafe {
15701        static_assert_sae!(SAE);
15702        let a = a.as_f64x8();
15703        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
15704        transmute(r)
15705    }
15706}
15707
15708/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15709/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15710///
15711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15712#[inline]
15713#[target_feature(enable = "avx512f")]
15714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15715#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15716#[rustc_legacy_const_generics(3)]
15717pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15718    src: __m256i,
15719    k: __mmask8,
15720    a: __m512d,
15721) -> __m256i {
15722    unsafe {
15723        static_assert_sae!(SAE);
15724        let a = a.as_f64x8();
15725        let src = src.as_i32x8();
15726        let r = vcvttpd2dq(a, src, k, SAE);
15727        transmute(r)
15728    }
15729}
15730
15731/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15732/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15733///
15734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15735#[inline]
15736#[target_feature(enable = "avx512f")]
15737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15738#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15739#[rustc_legacy_const_generics(2)]
15740pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15741    unsafe {
15742        static_assert_sae!(SAE);
15743        let a = a.as_f64x8();
15744        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
15745        transmute(r)
15746    }
15747}
15748
15749/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15750/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15751///
15752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15753#[inline]
15754#[target_feature(enable = "avx512f")]
15755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15756#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15757#[rustc_legacy_const_generics(1)]
15758pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15759    unsafe {
15760        static_assert_sae!(SAE);
15761        let a = a.as_f64x8();
15762        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
15763        transmute(r)
15764    }
15765}
15766
15767/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15769///
15770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15771#[inline]
15772#[target_feature(enable = "avx512f")]
15773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15774#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15775#[rustc_legacy_const_generics(3)]
15776pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15777    src: __m256i,
15778    k: __mmask8,
15779    a: __m512d,
15780) -> __m256i {
15781    unsafe {
15782        static_assert_sae!(SAE);
15783        let a = a.as_f64x8();
15784        let src = src.as_i32x8();
15785        let r = vcvttpd2udq(a, src, k, SAE);
15786        transmute(r)
15787    }
15788}
15789
15790/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15793#[inline]
15794#[target_feature(enable = "avx512f")]
15795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796#[cfg_attr(test, assert_instr(vcvttps2dq))]
15797pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15798    unsafe {
15799        transmute(vcvttps2dq(
15800            a.as_f32x16(),
15801            i32x16::ZERO,
15802            0b11111111_11111111,
15803            _MM_FROUND_CUR_DIRECTION,
15804        ))
15805    }
15806}
15807
15808/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814#[cfg_attr(test, assert_instr(vcvttps2dq))]
15815pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15816    unsafe {
15817        transmute(vcvttps2dq(
15818            a.as_f32x16(),
15819            src.as_i32x16(),
15820            k,
15821            _MM_FROUND_CUR_DIRECTION,
15822        ))
15823    }
15824}
15825
15826/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832#[cfg_attr(test, assert_instr(vcvttps2dq))]
15833pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15834    unsafe {
15835        transmute(vcvttps2dq(
15836            a.as_f32x16(),
15837            i32x16::ZERO,
15838            k,
15839            _MM_FROUND_CUR_DIRECTION,
15840        ))
15841    }
15842}
15843
15844/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15845///
15846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15847#[inline]
15848#[target_feature(enable = "avx512f,avx512vl")]
15849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15850#[cfg_attr(test, assert_instr(vcvttps2dq))]
15851pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15852    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
15853}
15854
15855/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15856///
15857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15858#[inline]
15859#[target_feature(enable = "avx512f,avx512vl")]
15860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15861#[cfg_attr(test, assert_instr(vcvttps2dq))]
15862pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15863    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
15864}
15865
15866/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15867///
15868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15869#[inline]
15870#[target_feature(enable = "avx512f,avx512vl")]
15871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15872#[cfg_attr(test, assert_instr(vcvttps2dq))]
15873pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15874    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
15875}
15876
15877/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15878///
15879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15880#[inline]
15881#[target_feature(enable = "avx512f,avx512vl")]
15882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15883#[cfg_attr(test, assert_instr(vcvttps2dq))]
15884pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15885    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
15886}
15887
15888/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15889///
15890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15891#[inline]
15892#[target_feature(enable = "avx512f")]
15893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15894#[cfg_attr(test, assert_instr(vcvttps2udq))]
15895pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15896    unsafe {
15897        transmute(vcvttps2udq(
15898            a.as_f32x16(),
15899            u32x16::ZERO,
15900            0b11111111_11111111,
15901            _MM_FROUND_CUR_DIRECTION,
15902        ))
15903    }
15904}
15905
15906/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15907///
15908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15909#[inline]
15910#[target_feature(enable = "avx512f")]
15911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15912#[cfg_attr(test, assert_instr(vcvttps2udq))]
15913pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15914    unsafe {
15915        transmute(vcvttps2udq(
15916            a.as_f32x16(),
15917            src.as_u32x16(),
15918            k,
15919            _MM_FROUND_CUR_DIRECTION,
15920        ))
15921    }
15922}
15923
15924/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15925///
15926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15927#[inline]
15928#[target_feature(enable = "avx512f")]
15929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15930#[cfg_attr(test, assert_instr(vcvttps2udq))]
15931pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15932    unsafe {
15933        transmute(vcvttps2udq(
15934            a.as_f32x16(),
15935            u32x16::ZERO,
15936            k,
15937            _MM_FROUND_CUR_DIRECTION,
15938        ))
15939    }
15940}
15941
15942/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15943///
15944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15945#[inline]
15946#[target_feature(enable = "avx512f,avx512vl")]
15947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15948#[cfg_attr(test, assert_instr(vcvttps2udq))]
15949pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15950    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
15951}
15952
15953/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15954///
15955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15956#[inline]
15957#[target_feature(enable = "avx512f,avx512vl")]
15958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15959#[cfg_attr(test, assert_instr(vcvttps2udq))]
15960pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15961    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
15962}
15963
15964/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15965///
15966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15967#[inline]
15968#[target_feature(enable = "avx512f,avx512vl")]
15969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15970#[cfg_attr(test, assert_instr(vcvttps2udq))]
15971pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15972    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
15973}
15974
15975/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15976///
15977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15978#[inline]
15979#[target_feature(enable = "avx512f,avx512vl")]
15980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15981#[cfg_attr(test, assert_instr(vcvttps2udq))]
15982pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15983    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
15984}
15985
15986/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15987///
15988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15989#[inline]
15990#[target_feature(enable = "avx512f,avx512vl")]
15991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15992#[cfg_attr(test, assert_instr(vcvttps2udq))]
15993pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15994    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
15995}
15996
15997/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15998///
15999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16000#[inline]
16001#[target_feature(enable = "avx512f,avx512vl")]
16002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16003#[cfg_attr(test, assert_instr(vcvttps2udq))]
16004pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16005    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
16006}
16007
16008/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16009/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16010///
16011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16012#[inline]
16013#[target_feature(enable = "avx512f")]
16014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16015#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16016#[rustc_legacy_const_generics(2)]
16017pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16018    unsafe {
16019        static_assert_sae!(SAE);
16020        let a = a.as_f64x8();
16021        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
16022        transmute(r)
16023    }
16024}
16025
16026/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16027///
16028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16029#[inline]
16030#[target_feature(enable = "avx512f")]
16031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16032#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16033pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16034    unsafe {
16035        transmute(vcvttpd2dq(
16036            a.as_f64x8(),
16037            i32x8::ZERO,
16038            0b11111111,
16039            _MM_FROUND_CUR_DIRECTION,
16040        ))
16041    }
16042}
16043
16044/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16045///
16046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16047#[inline]
16048#[target_feature(enable = "avx512f")]
16049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16050#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16051pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16052    unsafe {
16053        transmute(vcvttpd2dq(
16054            a.as_f64x8(),
16055            src.as_i32x8(),
16056            k,
16057            _MM_FROUND_CUR_DIRECTION,
16058        ))
16059    }
16060}
16061
16062/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16063///
16064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16065#[inline]
16066#[target_feature(enable = "avx512f")]
16067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16068#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16069pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16070    unsafe {
16071        transmute(vcvttpd2dq(
16072            a.as_f64x8(),
16073            i32x8::ZERO,
16074            k,
16075            _MM_FROUND_CUR_DIRECTION,
16076        ))
16077    }
16078}
16079
16080/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16081///
16082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16083#[inline]
16084#[target_feature(enable = "avx512f,avx512vl")]
16085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16086#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16087pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16088    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16089}
16090
16091/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16092///
16093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16094#[inline]
16095#[target_feature(enable = "avx512f,avx512vl")]
16096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16097#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16098pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16099    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16100}
16101
16102/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16105#[inline]
16106#[target_feature(enable = "avx512f,avx512vl")]
16107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16109pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16110    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16111}
16112
16113/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16114///
16115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16116#[inline]
16117#[target_feature(enable = "avx512f,avx512vl")]
16118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16119#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16120pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16121    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16122}
16123
16124/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16125///
16126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16127#[inline]
16128#[target_feature(enable = "avx512f")]
16129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16130#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16131pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16132    unsafe {
16133        transmute(vcvttpd2udq(
16134            a.as_f64x8(),
16135            i32x8::ZERO,
16136            0b11111111,
16137            _MM_FROUND_CUR_DIRECTION,
16138        ))
16139    }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16143///
16144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16145#[inline]
16146#[target_feature(enable = "avx512f")]
16147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16148#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16149pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16150    unsafe {
16151        transmute(vcvttpd2udq(
16152            a.as_f64x8(),
16153            src.as_i32x8(),
16154            k,
16155            _MM_FROUND_CUR_DIRECTION,
16156        ))
16157    }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16161///
16162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16163#[inline]
16164#[target_feature(enable = "avx512f")]
16165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16166#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16167pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16168    unsafe {
16169        transmute(vcvttpd2udq(
16170            a.as_f64x8(),
16171            i32x8::ZERO,
16172            k,
16173            _MM_FROUND_CUR_DIRECTION,
16174        ))
16175    }
16176}
16177
16178/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16179///
16180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16181#[inline]
16182#[target_feature(enable = "avx512f,avx512vl")]
16183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16184#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16185pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16186    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16187}
16188
16189/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16190///
16191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16192#[inline]
16193#[target_feature(enable = "avx512f,avx512vl")]
16194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16195#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16196pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16197    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16198}
16199
16200/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16201///
16202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16203#[inline]
16204#[target_feature(enable = "avx512f,avx512vl")]
16205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16206#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16207pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16208    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16209}
16210
16211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16212///
16213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16214#[inline]
16215#[target_feature(enable = "avx512f,avx512vl")]
16216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16217#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16218pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16219    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16220}
16221
16222/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16223///
16224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16225#[inline]
16226#[target_feature(enable = "avx512f,avx512vl")]
16227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16228#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16229pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16230    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16231}
16232
16233/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16234///
16235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16236#[inline]
16237#[target_feature(enable = "avx512f,avx512vl")]
16238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16239#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16240pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16241    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16242}
16243
16244/// Returns vector of type `__m512d` with all elements set to zero.
16245///
16246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16247#[inline]
16248#[target_feature(enable = "avx512f")]
16249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16250#[cfg_attr(test, assert_instr(vxorps))]
16251pub fn _mm512_setzero_pd() -> __m512d {
16252    // All-0 is a properly initialized __m512d
16253    unsafe { const { mem::zeroed() } }
16254}
16255
16256/// Returns vector of type `__m512` with all elements set to zero.
16257///
16258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16259#[inline]
16260#[target_feature(enable = "avx512f")]
16261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16262#[cfg_attr(test, assert_instr(vxorps))]
16263pub fn _mm512_setzero_ps() -> __m512 {
16264    // All-0 is a properly initialized __m512
16265    unsafe { const { mem::zeroed() } }
16266}
16267
16268/// Return vector of type `__m512` with all elements set to zero.
16269///
16270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16271#[inline]
16272#[target_feature(enable = "avx512f")]
16273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16274#[cfg_attr(test, assert_instr(vxorps))]
16275pub fn _mm512_setzero() -> __m512 {
16276    // All-0 is a properly initialized __m512
16277    unsafe { const { mem::zeroed() } }
16278}
16279
16280/// Returns vector of type `__m512i` with all elements set to zero.
16281///
16282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16283#[inline]
16284#[target_feature(enable = "avx512f")]
16285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16286#[cfg_attr(test, assert_instr(vxorps))]
16287pub fn _mm512_setzero_si512() -> __m512i {
16288    // All-0 is a properly initialized __m512i
16289    unsafe { const { mem::zeroed() } }
16290}
16291
16292/// Return vector of type `__m512i` with all elements set to zero.
16293///
16294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16295#[inline]
16296#[target_feature(enable = "avx512f")]
16297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16298#[cfg_attr(test, assert_instr(vxorps))]
16299pub fn _mm512_setzero_epi32() -> __m512i {
16300    // All-0 is a properly initialized __m512i
16301    unsafe { const { mem::zeroed() } }
16302}
16303
16304/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16305/// order.
16306///
16307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16308#[inline]
16309#[target_feature(enable = "avx512f")]
16310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16311pub fn _mm512_setr_epi32(
16312    e15: i32,
16313    e14: i32,
16314    e13: i32,
16315    e12: i32,
16316    e11: i32,
16317    e10: i32,
16318    e9: i32,
16319    e8: i32,
16320    e7: i32,
16321    e6: i32,
16322    e5: i32,
16323    e4: i32,
16324    e3: i32,
16325    e2: i32,
16326    e1: i32,
16327    e0: i32,
16328) -> __m512i {
16329    unsafe {
16330        let r = i32x16::new(
16331            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16332        );
16333        transmute(r)
16334    }
16335}
16336
16337/// Set packed 8-bit integers in dst with the supplied values.
16338///
16339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16340#[inline]
16341#[target_feature(enable = "avx512f")]
16342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16343pub fn _mm512_set_epi8(
16344    e63: i8,
16345    e62: i8,
16346    e61: i8,
16347    e60: i8,
16348    e59: i8,
16349    e58: i8,
16350    e57: i8,
16351    e56: i8,
16352    e55: i8,
16353    e54: i8,
16354    e53: i8,
16355    e52: i8,
16356    e51: i8,
16357    e50: i8,
16358    e49: i8,
16359    e48: i8,
16360    e47: i8,
16361    e46: i8,
16362    e45: i8,
16363    e44: i8,
16364    e43: i8,
16365    e42: i8,
16366    e41: i8,
16367    e40: i8,
16368    e39: i8,
16369    e38: i8,
16370    e37: i8,
16371    e36: i8,
16372    e35: i8,
16373    e34: i8,
16374    e33: i8,
16375    e32: i8,
16376    e31: i8,
16377    e30: i8,
16378    e29: i8,
16379    e28: i8,
16380    e27: i8,
16381    e26: i8,
16382    e25: i8,
16383    e24: i8,
16384    e23: i8,
16385    e22: i8,
16386    e21: i8,
16387    e20: i8,
16388    e19: i8,
16389    e18: i8,
16390    e17: i8,
16391    e16: i8,
16392    e15: i8,
16393    e14: i8,
16394    e13: i8,
16395    e12: i8,
16396    e11: i8,
16397    e10: i8,
16398    e9: i8,
16399    e8: i8,
16400    e7: i8,
16401    e6: i8,
16402    e5: i8,
16403    e4: i8,
16404    e3: i8,
16405    e2: i8,
16406    e1: i8,
16407    e0: i8,
16408) -> __m512i {
16409    unsafe {
16410        let r = i8x64::new(
16411            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16412            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16413            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16414            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16415        );
16416        transmute(r)
16417    }
16418}
16419
16420/// Set packed 16-bit integers in dst with the supplied values.
16421///
16422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16423#[inline]
16424#[target_feature(enable = "avx512f")]
16425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16426pub fn _mm512_set_epi16(
16427    e31: i16,
16428    e30: i16,
16429    e29: i16,
16430    e28: i16,
16431    e27: i16,
16432    e26: i16,
16433    e25: i16,
16434    e24: i16,
16435    e23: i16,
16436    e22: i16,
16437    e21: i16,
16438    e20: i16,
16439    e19: i16,
16440    e18: i16,
16441    e17: i16,
16442    e16: i16,
16443    e15: i16,
16444    e14: i16,
16445    e13: i16,
16446    e12: i16,
16447    e11: i16,
16448    e10: i16,
16449    e9: i16,
16450    e8: i16,
16451    e7: i16,
16452    e6: i16,
16453    e5: i16,
16454    e4: i16,
16455    e3: i16,
16456    e2: i16,
16457    e1: i16,
16458    e0: i16,
16459) -> __m512i {
16460    unsafe {
16461        let r = i16x32::new(
16462            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16463            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16464        );
16465        transmute(r)
16466    }
16467}
16468
16469/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16470///
16471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16475pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16476    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16477}
16478
16479/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16480///
16481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16482#[inline]
16483#[target_feature(enable = "avx512f")]
16484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16485pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16486    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16487}
16488
16489/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16490///
16491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16492#[inline]
16493#[target_feature(enable = "avx512f")]
16494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16495pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16496    _mm512_set_pd(d, c, b, a, d, c, b, a)
16497}
16498
16499/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16500///
16501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16502#[inline]
16503#[target_feature(enable = "avx512f")]
16504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16505pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16506    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16507}
16508
16509/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16510///
16511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16512#[inline]
16513#[target_feature(enable = "avx512f")]
16514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16515pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16516    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16517}
16518
16519/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16520///
16521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16522#[inline]
16523#[target_feature(enable = "avx512f")]
16524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16525pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16526    _mm512_set_pd(a, b, c, d, a, b, c, d)
16527}
16528
16529/// Set packed 64-bit integers in dst with the supplied values.
16530///
16531/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16532#[inline]
16533#[target_feature(enable = "avx512f")]
16534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16535pub fn _mm512_set_epi64(
16536    e0: i64,
16537    e1: i64,
16538    e2: i64,
16539    e3: i64,
16540    e4: i64,
16541    e5: i64,
16542    e6: i64,
16543    e7: i64,
16544) -> __m512i {
16545    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16546}
16547
16548/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16549///
16550/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16551#[inline]
16552#[target_feature(enable = "avx512f")]
16553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16554pub fn _mm512_setr_epi64(
16555    e0: i64,
16556    e1: i64,
16557    e2: i64,
16558    e3: i64,
16559    e4: i64,
16560    e5: i64,
16561    e6: i64,
16562    e7: i64,
16563) -> __m512i {
16564    unsafe {
16565        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16566        transmute(r)
16567    }
16568}
16569
16570/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16571///
16572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16573#[inline]
16574#[target_feature(enable = "avx512f")]
16575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16576#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16577#[rustc_legacy_const_generics(2)]
16578pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16579    offsets: __m256i,
16580    slice: *const f64,
16581) -> __m512d {
16582    static_assert_imm8_scale!(SCALE);
16583    let zero = f64x8::ZERO;
16584    let neg_one = -1;
16585    let slice = slice as *const i8;
16586    let offsets = offsets.as_i32x8();
16587    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16588    transmute(r)
16589}
16590
16591/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16592///
16593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16594#[inline]
16595#[target_feature(enable = "avx512f")]
16596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16597#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16598#[rustc_legacy_const_generics(4)]
16599pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16600    src: __m512d,
16601    mask: __mmask8,
16602    offsets: __m256i,
16603    slice: *const f64,
16604) -> __m512d {
16605    static_assert_imm8_scale!(SCALE);
16606    let src = src.as_f64x8();
16607    let slice = slice as *const i8;
16608    let offsets = offsets.as_i32x8();
16609    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16610    transmute(r)
16611}
16612
16613/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16614///
16615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16616#[inline]
16617#[target_feature(enable = "avx512f")]
16618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16619#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16620#[rustc_legacy_const_generics(2)]
16621pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
16622    offsets: __m512i,
16623    slice: *const f64,
16624) -> __m512d {
16625    static_assert_imm8_scale!(SCALE);
16626    let zero = f64x8::ZERO;
16627    let neg_one = -1;
16628    let slice = slice as *const i8;
16629    let offsets = offsets.as_i64x8();
16630    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
16631    transmute(r)
16632}
16633
16634/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16635///
16636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16637#[inline]
16638#[target_feature(enable = "avx512f")]
16639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16640#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16641#[rustc_legacy_const_generics(4)]
16642pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16643    src: __m512d,
16644    mask: __mmask8,
16645    offsets: __m512i,
16646    slice: *const f64,
16647) -> __m512d {
16648    static_assert_imm8_scale!(SCALE);
16649    let src = src.as_f64x8();
16650    let slice = slice as *const i8;
16651    let offsets = offsets.as_i64x8();
16652    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16653    transmute(r)
16654}
16655
16656/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16657///
16658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16659#[inline]
16660#[target_feature(enable = "avx512f")]
16661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16662#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16663#[rustc_legacy_const_generics(2)]
16664pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
16665    static_assert_imm8_scale!(SCALE);
16666    let zero = f32x8::ZERO;
16667    let neg_one = -1;
16668    let slice = slice as *const i8;
16669    let offsets = offsets.as_i64x8();
16670    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
16671    transmute(r)
16672}
16673
16674/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16675///
16676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16677#[inline]
16678#[target_feature(enable = "avx512f")]
16679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16680#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16681#[rustc_legacy_const_generics(4)]
16682pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16683    src: __m256,
16684    mask: __mmask8,
16685    offsets: __m512i,
16686    slice: *const f32,
16687) -> __m256 {
16688    static_assert_imm8_scale!(SCALE);
16689    let src = src.as_f32x8();
16690    let slice = slice as *const i8;
16691    let offsets = offsets.as_i64x8();
16692    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16693    transmute(r)
16694}
16695
16696/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16697///
16698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16699#[inline]
16700#[target_feature(enable = "avx512f")]
16701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16702#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16703#[rustc_legacy_const_generics(2)]
16704pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
16705    static_assert_imm8_scale!(SCALE);
16706    let zero = f32x16::ZERO;
16707    let neg_one = -1;
16708    let slice = slice as *const i8;
16709    let offsets = offsets.as_i32x16();
16710    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
16711    transmute(r)
16712}
16713
16714/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16715///
16716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16717#[inline]
16718#[target_feature(enable = "avx512f")]
16719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16720#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16721#[rustc_legacy_const_generics(4)]
16722pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16723    src: __m512,
16724    mask: __mmask16,
16725    offsets: __m512i,
16726    slice: *const f32,
16727) -> __m512 {
16728    static_assert_imm8_scale!(SCALE);
16729    let src = src.as_f32x16();
16730    let slice = slice as *const i8;
16731    let offsets = offsets.as_i32x16();
16732    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16733    transmute(r)
16734}
16735
16736/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16743#[rustc_legacy_const_generics(2)]
16744pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16745    offsets: __m512i,
16746    slice: *const i32,
16747) -> __m512i {
16748    static_assert_imm8_scale!(SCALE);
16749    let zero = i32x16::ZERO;
16750    let neg_one = -1;
16751    let slice = slice as *const i8;
16752    let offsets = offsets.as_i32x16();
16753    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
16754    transmute(r)
16755}
16756
16757/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16758///
16759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16760#[inline]
16761#[target_feature(enable = "avx512f")]
16762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16763#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16764#[rustc_legacy_const_generics(4)]
16765pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16766    src: __m512i,
16767    mask: __mmask16,
16768    offsets: __m512i,
16769    slice: *const i32,
16770) -> __m512i {
16771    static_assert_imm8_scale!(SCALE);
16772    let src = src.as_i32x16();
16773    let mask = mask as i16;
16774    let slice = slice as *const i8;
16775    let offsets = offsets.as_i32x16();
16776    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
16777    transmute(r)
16778}
16779
16780/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16781///
16782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16783#[inline]
16784#[target_feature(enable = "avx512f")]
16785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16786#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16787#[rustc_legacy_const_generics(2)]
16788pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16789    offsets: __m256i,
16790    slice: *const i64,
16791) -> __m512i {
16792    static_assert_imm8_scale!(SCALE);
16793    let zero = i64x8::ZERO;
16794    let neg_one = -1;
16795    let slice = slice as *const i8;
16796    let offsets = offsets.as_i32x8();
16797    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
16798    transmute(r)
16799}
16800
16801/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16802///
16803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16804#[inline]
16805#[target_feature(enable = "avx512f")]
16806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16807#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16808#[rustc_legacy_const_generics(4)]
16809pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16810    src: __m512i,
16811    mask: __mmask8,
16812    offsets: __m256i,
16813    slice: *const i64,
16814) -> __m512i {
16815    static_assert_imm8_scale!(SCALE);
16816    let src = src.as_i64x8();
16817    let mask = mask as i8;
16818    let slice = slice as *const i8;
16819    let offsets = offsets.as_i32x8();
16820    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
16821    transmute(r)
16822}
16823
16824/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16825///
16826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16827#[inline]
16828#[target_feature(enable = "avx512f")]
16829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16830#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16831#[rustc_legacy_const_generics(2)]
16832pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16833    offsets: __m512i,
16834    slice: *const i64,
16835) -> __m512i {
16836    static_assert_imm8_scale!(SCALE);
16837    let zero = i64x8::ZERO;
16838    let neg_one = -1;
16839    let slice = slice as *const i8;
16840    let offsets = offsets.as_i64x8();
16841    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
16842    transmute(r)
16843}
16844
16845/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16846///
16847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16848#[inline]
16849#[target_feature(enable = "avx512f")]
16850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16851#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16852#[rustc_legacy_const_generics(4)]
16853pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16854    src: __m512i,
16855    mask: __mmask8,
16856    offsets: __m512i,
16857    slice: *const i64,
16858) -> __m512i {
16859    static_assert_imm8_scale!(SCALE);
16860    let src = src.as_i64x8();
16861    let mask = mask as i8;
16862    let slice = slice as *const i8;
16863    let offsets = offsets.as_i64x8();
16864    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
16865    transmute(r)
16866}
16867
16868/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16869///
16870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16871#[inline]
16872#[target_feature(enable = "avx512f")]
16873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16874#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16875#[rustc_legacy_const_generics(2)]
16876pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16877    offsets: __m512i,
16878    slice: *const i32,
16879) -> __m256i {
16880    static_assert_imm8_scale!(SCALE);
16881    let zeros = i32x8::ZERO;
16882    let neg_one = -1;
16883    let slice = slice as *const i8;
16884    let offsets = offsets.as_i64x8();
16885    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
16886    transmute(r)
16887}
16888
16889/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16890///
16891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16892#[inline]
16893#[target_feature(enable = "avx512f")]
16894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16895#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16896#[rustc_legacy_const_generics(4)]
16897pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16898    src: __m256i,
16899    mask: __mmask8,
16900    offsets: __m512i,
16901    slice: *const i32,
16902) -> __m256i {
16903    static_assert_imm8_scale!(SCALE);
16904    let src = src.as_i32x8();
16905    let mask = mask as i8;
16906    let slice = slice as *const i8;
16907    let offsets = offsets.as_i64x8();
16908    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
16909    transmute(r)
16910}
16911
16912/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16913///
16914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16915#[inline]
16916#[target_feature(enable = "avx512f")]
16917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16918#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16919#[rustc_legacy_const_generics(3)]
16920pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16921    slice: *mut f64,
16922    offsets: __m256i,
16923    src: __m512d,
16924) {
16925    static_assert_imm8_scale!(SCALE);
16926    let src = src.as_f64x8();
16927    let neg_one = -1;
16928    let slice = slice as *mut i8;
16929    let offsets = offsets.as_i32x8();
16930    vscatterdpd(slice, neg_one, offsets, src, SCALE);
16931}
16932
16933/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16934///
16935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16936#[inline]
16937#[target_feature(enable = "avx512f")]
16938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16939#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16940#[rustc_legacy_const_generics(4)]
16941pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16942    slice: *mut f64,
16943    mask: __mmask8,
16944    offsets: __m256i,
16945    src: __m512d,
16946) {
16947    static_assert_imm8_scale!(SCALE);
16948    let src = src.as_f64x8();
16949    let slice = slice as *mut i8;
16950    let offsets = offsets.as_i32x8();
16951    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16952}
16953
16954/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16955///
16956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16957#[inline]
16958#[target_feature(enable = "avx512f")]
16959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16960#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16961#[rustc_legacy_const_generics(3)]
16962pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16963    slice: *mut f64,
16964    offsets: __m512i,
16965    src: __m512d,
16966) {
16967    static_assert_imm8_scale!(SCALE);
16968    let src = src.as_f64x8();
16969    let neg_one = -1;
16970    let slice = slice as *mut i8;
16971    let offsets = offsets.as_i64x8();
16972    vscatterqpd(slice, neg_one, offsets, src, SCALE);
16973}
16974
16975/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16976///
16977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16978#[inline]
16979#[target_feature(enable = "avx512f")]
16980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16981#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16982#[rustc_legacy_const_generics(4)]
16983pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16984    slice: *mut f64,
16985    mask: __mmask8,
16986    offsets: __m512i,
16987    src: __m512d,
16988) {
16989    static_assert_imm8_scale!(SCALE);
16990    let src = src.as_f64x8();
16991    let slice = slice as *mut i8;
16992    let offsets = offsets.as_i64x8();
16993    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16994}
16995
16996/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16997///
16998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16999#[inline]
17000#[target_feature(enable = "avx512f")]
17001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17002#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17003#[rustc_legacy_const_generics(3)]
17004pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17005    slice: *mut f32,
17006    offsets: __m512i,
17007    src: __m512,
17008) {
17009    static_assert_imm8_scale!(SCALE);
17010    let src = src.as_f32x16();
17011    let neg_one = -1;
17012    let slice = slice as *mut i8;
17013    let offsets = offsets.as_i32x16();
17014    vscatterdps(slice, neg_one, offsets, src, SCALE);
17015}
17016
17017/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17018///
17019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17020#[inline]
17021#[target_feature(enable = "avx512f")]
17022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17023#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17024#[rustc_legacy_const_generics(4)]
17025pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17026    slice: *mut f32,
17027    mask: __mmask16,
17028    offsets: __m512i,
17029    src: __m512,
17030) {
17031    static_assert_imm8_scale!(SCALE);
17032    let src = src.as_f32x16();
17033    let slice = slice as *mut i8;
17034    let offsets = offsets.as_i32x16();
17035    vscatterdps(slice, mask as i16, offsets, src, SCALE);
17036}
17037
17038/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17039///
17040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17041#[inline]
17042#[target_feature(enable = "avx512f")]
17043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17044#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17045#[rustc_legacy_const_generics(3)]
17046pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17047    slice: *mut f32,
17048    offsets: __m512i,
17049    src: __m256,
17050) {
17051    static_assert_imm8_scale!(SCALE);
17052    let src = src.as_f32x8();
17053    let neg_one = -1;
17054    let slice = slice as *mut i8;
17055    let offsets = offsets.as_i64x8();
17056    vscatterqps(slice, neg_one, offsets, src, SCALE);
17057}
17058
17059/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17060///
17061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17062#[inline]
17063#[target_feature(enable = "avx512f")]
17064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17065#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17066#[rustc_legacy_const_generics(4)]
17067pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17068    slice: *mut f32,
17069    mask: __mmask8,
17070    offsets: __m512i,
17071    src: __m256,
17072) {
17073    static_assert_imm8_scale!(SCALE);
17074    let src = src.as_f32x8();
17075    let slice = slice as *mut i8;
17076    let offsets = offsets.as_i64x8();
17077    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17078}
17079
17080/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17081///
17082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17083#[inline]
17084#[target_feature(enable = "avx512f")]
17085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17086#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17087#[rustc_legacy_const_generics(3)]
17088pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17089    slice: *mut i64,
17090    offsets: __m256i,
17091    src: __m512i,
17092) {
17093    static_assert_imm8_scale!(SCALE);
17094    let src = src.as_i64x8();
17095    let neg_one = -1;
17096    let slice = slice as *mut i8;
17097    let offsets = offsets.as_i32x8();
17098    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17099}
17100
17101/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17102///
17103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17104#[inline]
17105#[target_feature(enable = "avx512f")]
17106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17107#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17108#[rustc_legacy_const_generics(4)]
17109pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17110    slice: *mut i64,
17111    mask: __mmask8,
17112    offsets: __m256i,
17113    src: __m512i,
17114) {
17115    static_assert_imm8_scale!(SCALE);
17116    let src = src.as_i64x8();
17117    let mask = mask as i8;
17118    let slice = slice as *mut i8;
17119    let offsets = offsets.as_i32x8();
17120    vpscatterdq(slice, mask, offsets, src, SCALE);
17121}
17122
17123/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17130#[rustc_legacy_const_generics(3)]
17131pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17132    slice: *mut i64,
17133    offsets: __m512i,
17134    src: __m512i,
17135) {
17136    static_assert_imm8_scale!(SCALE);
17137    let src = src.as_i64x8();
17138    let neg_one = -1;
17139    let slice = slice as *mut i8;
17140    let offsets = offsets.as_i64x8();
17141    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17142}
17143
17144/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17145///
17146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17147#[inline]
17148#[target_feature(enable = "avx512f")]
17149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17150#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17151#[rustc_legacy_const_generics(4)]
17152pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17153    slice: *mut i64,
17154    mask: __mmask8,
17155    offsets: __m512i,
17156    src: __m512i,
17157) {
17158    static_assert_imm8_scale!(SCALE);
17159    let src = src.as_i64x8();
17160    let mask = mask as i8;
17161    let slice = slice as *mut i8;
17162    let offsets = offsets.as_i64x8();
17163    vpscatterqq(slice, mask, offsets, src, SCALE);
17164}
17165
17166/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(3)]
17174pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17175    slice: *mut i32,
17176    offsets: __m512i,
17177    src: __m512i,
17178) {
17179    static_assert_imm8_scale!(SCALE);
17180    let src = src.as_i32x16();
17181    let neg_one = -1;
17182    let slice = slice as *mut i8;
17183    let offsets = offsets.as_i32x16();
17184    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17185}
17186
17187/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17188///
17189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17190#[inline]
17191#[target_feature(enable = "avx512f")]
17192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17193#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17194#[rustc_legacy_const_generics(4)]
17195pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17196    slice: *mut i32,
17197    mask: __mmask16,
17198    offsets: __m512i,
17199    src: __m512i,
17200) {
17201    static_assert_imm8_scale!(SCALE);
17202    let src = src.as_i32x16();
17203    let mask = mask as i16;
17204    let slice = slice as *mut i8;
17205    let offsets = offsets.as_i32x16();
17206    vpscatterdd(slice, mask, offsets, src, SCALE);
17207}
17208
17209/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17210///
17211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17212#[inline]
17213#[target_feature(enable = "avx512f")]
17214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17215#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17216#[rustc_legacy_const_generics(3)]
17217pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17218    slice: *mut i32,
17219    offsets: __m512i,
17220    src: __m256i,
17221) {
17222    static_assert_imm8_scale!(SCALE);
17223    let src = src.as_i32x8();
17224    let neg_one = -1;
17225    let slice = slice as *mut i8;
17226    let offsets = offsets.as_i64x8();
17227    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17228}
17229
17230/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17231///
17232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17233#[inline]
17234#[target_feature(enable = "avx512f")]
17235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17236#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17237#[rustc_legacy_const_generics(4)]
17238pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17239    slice: *mut i32,
17240    mask: __mmask8,
17241    offsets: __m512i,
17242    src: __m256i,
17243) {
17244    static_assert_imm8_scale!(SCALE);
17245    let src = src.as_i32x8();
17246    let mask = mask as i8;
17247    let slice = slice as *mut i8;
17248    let offsets = offsets.as_i64x8();
17249    vpscatterqd(slice, mask, offsets, src, SCALE);
17250}
17251
17252/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17253/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17254///
17255/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17256#[inline]
17257#[target_feature(enable = "avx512f")]
17258#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17259#[rustc_legacy_const_generics(2)]
17260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17261pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17262    vindex: __m512i,
17263    base_addr: *const i64,
17264) -> __m512i {
17265    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17266}
17267
17268/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17269/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17270/// (elements are copied from src when the corresponding mask bit is not set).
17271///
17272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17273#[inline]
17274#[target_feature(enable = "avx512f")]
17275#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17276#[rustc_legacy_const_generics(4)]
17277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17278pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17279    src: __m512i,
17280    k: __mmask8,
17281    vindex: __m512i,
17282    base_addr: *const i64,
17283) -> __m512i {
17284    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17285}
17286
17287/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17288/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17289///
17290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17291#[inline]
17292#[target_feature(enable = "avx512f")]
17293#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17294#[rustc_legacy_const_generics(2)]
17295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17296pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17297    vindex: __m512i,
17298    base_addr: *const f64,
17299) -> __m512d {
17300    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17301}
17302
17303/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17304/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17305/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17306///
17307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17308#[inline]
17309#[target_feature(enable = "avx512f")]
17310#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17311#[rustc_legacy_const_generics(4)]
17312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17313pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17314    src: __m512d,
17315    k: __mmask8,
17316    vindex: __m512i,
17317    base_addr: *const f64,
17318) -> __m512d {
17319    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17320}
17321
17322/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17323/// indices stored in the lower half of vindex scaled by scale.
17324///
17325/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17326#[inline]
17327#[target_feature(enable = "avx512f")]
17328#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17329#[rustc_legacy_const_generics(3)]
17330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17331pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17332    base_addr: *mut i64,
17333    vindex: __m512i,
17334    a: __m512i,
17335) {
17336    _mm512_i32scatter_epi64::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17337}
17338
17339/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17340/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17341/// mask bit is not set are not written to memory).
17342///
17343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17344#[inline]
17345#[target_feature(enable = "avx512f")]
17346#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17347#[rustc_legacy_const_generics(4)]
17348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17349pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17350    base_addr: *mut i64,
17351    k: __mmask8,
17352    vindex: __m512i,
17353    a: __m512i,
17354) {
17355    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17356}
17357
17358/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17359/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17360///
17361/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17362#[inline]
17363#[target_feature(enable = "avx512f")]
17364#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17365#[rustc_legacy_const_generics(3)]
17366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17367pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17368    base_addr: *mut f64,
17369    vindex: __m512i,
17370    a: __m512d,
17371) {
17372    _mm512_i32scatter_pd::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17373}
17374
17375/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17376/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17377/// (elements whose corresponding mask bit is not set are not written to memory).
17378///
17379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17380#[inline]
17381#[target_feature(enable = "avx512f")]
17382#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17383#[rustc_legacy_const_generics(4)]
17384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17385pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17386    base_addr: *mut f64,
17387    k: __mmask8,
17388    vindex: __m512i,
17389    a: __m512d,
17390) {
17391    _mm512_mask_i32scatter_pd::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17392}
17393
17394/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17395/// indices stored in vindex scaled by scale
17396///
17397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17398#[inline]
17399#[target_feature(enable = "avx512f,avx512vl")]
17400#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17401#[rustc_legacy_const_generics(3)]
17402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17403pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17404    base_addr: *mut i32,
17405    vindex: __m256i,
17406    a: __m256i,
17407) {
17408    static_assert_imm8_scale!(SCALE);
17409    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17410}
17411
17412/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17413/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17414/// are not written to memory).
17415///
17416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17417#[inline]
17418#[target_feature(enable = "avx512f,avx512vl")]
17419#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17420#[rustc_legacy_const_generics(4)]
17421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17422pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17423    base_addr: *mut i32,
17424    k: __mmask8,
17425    vindex: __m256i,
17426    a: __m256i,
17427) {
17428    static_assert_imm8_scale!(SCALE);
17429    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17430}
17431
17432/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17433///
17434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17435#[inline]
17436#[target_feature(enable = "avx512f,avx512vl")]
17437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17438#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17439#[rustc_legacy_const_generics(3)]
17440pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17441    slice: *mut i64,
17442    offsets: __m128i,
17443    src: __m256i,
17444) {
17445    static_assert_imm8_scale!(SCALE);
17446    let src = src.as_i64x4();
17447    let slice = slice as *mut i8;
17448    let offsets = offsets.as_i32x4();
17449    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17450}
17451
17452/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17453/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17454/// are not written to memory).
17455///
17456/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17457#[inline]
17458#[target_feature(enable = "avx512f,avx512vl")]
17459#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17460#[rustc_legacy_const_generics(4)]
17461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17462pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17463    base_addr: *mut i64,
17464    k: __mmask8,
17465    vindex: __m128i,
17466    a: __m256i,
17467) {
17468    static_assert_imm8_scale!(SCALE);
17469    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17470}
17471
17472/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17473/// at packed 32-bit integer indices stored in vindex scaled by scale
17474///
17475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17476#[inline]
17477#[target_feature(enable = "avx512f,avx512vl")]
17478#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17479#[rustc_legacy_const_generics(3)]
17480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17481pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17482    base_addr: *mut f64,
17483    vindex: __m128i,
17484    a: __m256d,
17485) {
17486    static_assert_imm8_scale!(SCALE);
17487    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17488}
17489
17490/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17491/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17492/// mask bit is not set are not written to memory).
17493///
17494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17495#[inline]
17496#[target_feature(enable = "avx512f,avx512vl")]
17497#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17498#[rustc_legacy_const_generics(4)]
17499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17500pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17501    base_addr: *mut f64,
17502    k: __mmask8,
17503    vindex: __m128i,
17504    a: __m256d,
17505) {
17506    static_assert_imm8_scale!(SCALE);
17507    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17508}
17509
17510/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17511/// at packed 32-bit integer indices stored in vindex scaled by scale
17512///
17513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17514#[inline]
17515#[target_feature(enable = "avx512f,avx512vl")]
17516#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17517#[rustc_legacy_const_generics(3)]
17518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17519pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17520    base_addr: *mut f32,
17521    vindex: __m256i,
17522    a: __m256,
17523) {
17524    static_assert_imm8_scale!(SCALE);
17525    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17526}
17527
17528/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17529/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17530/// mask bit is not set are not written to memory).
17531///
17532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17533#[inline]
17534#[target_feature(enable = "avx512f,avx512vl")]
17535#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17536#[rustc_legacy_const_generics(4)]
17537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17539    base_addr: *mut f32,
17540    k: __mmask8,
17541    vindex: __m256i,
17542    a: __m256,
17543) {
17544    static_assert_imm8_scale!(SCALE);
17545    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17546}
17547
17548/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17549/// indices stored in vindex scaled by scale
17550///
17551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17552#[inline]
17553#[target_feature(enable = "avx512f,avx512vl")]
17554#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17555#[rustc_legacy_const_generics(3)]
17556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17557pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17558    base_addr: *mut i32,
17559    vindex: __m256i,
17560    a: __m128i,
17561) {
17562    static_assert_imm8_scale!(SCALE);
17563    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17564}
17565
17566/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17567/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17568/// are not written to memory).
17569///
17570/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17571#[inline]
17572#[target_feature(enable = "avx512f,avx512vl")]
17573#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17574#[rustc_legacy_const_generics(4)]
17575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17576pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17577    base_addr: *mut i32,
17578    k: __mmask8,
17579    vindex: __m256i,
17580    a: __m128i,
17581) {
17582    static_assert_imm8_scale!(SCALE);
17583    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17584}
17585
17586/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17587/// indices stored in vindex scaled by scale
17588///
17589/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17590#[inline]
17591#[target_feature(enable = "avx512f,avx512vl")]
17592#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17593#[rustc_legacy_const_generics(3)]
17594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17595pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17596    base_addr: *mut i64,
17597    vindex: __m256i,
17598    a: __m256i,
17599) {
17600    static_assert_imm8_scale!(SCALE);
17601    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17602}
17603
17604/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17605/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17606/// are not written to memory).
17607///
17608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17609#[inline]
17610#[target_feature(enable = "avx512f,avx512vl")]
17611#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17612#[rustc_legacy_const_generics(4)]
17613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17614pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17615    base_addr: *mut i64,
17616    k: __mmask8,
17617    vindex: __m256i,
17618    a: __m256i,
17619) {
17620    static_assert_imm8_scale!(SCALE);
17621    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17622}
17623
17624/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17625/// at packed 64-bit integer indices stored in vindex scaled by scale
17626///
17627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17628#[inline]
17629#[target_feature(enable = "avx512f,avx512vl")]
17630#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17631#[rustc_legacy_const_generics(3)]
17632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17633pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17634    base_addr: *mut f64,
17635    vindex: __m256i,
17636    a: __m256d,
17637) {
17638    static_assert_imm8_scale!(SCALE);
17639    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17640}
17641
17642/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17643/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17644/// mask bit is not set are not written to memory).
17645///
17646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17647#[inline]
17648#[target_feature(enable = "avx512f,avx512vl")]
17649#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17650#[rustc_legacy_const_generics(4)]
17651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17652pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17653    base_addr: *mut f64,
17654    k: __mmask8,
17655    vindex: __m256i,
17656    a: __m256d,
17657) {
17658    static_assert_imm8_scale!(SCALE);
17659    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17660}
17661
17662/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17663/// at packed 64-bit integer indices stored in vindex scaled by scale
17664///
17665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17666#[inline]
17667#[target_feature(enable = "avx512f,avx512vl")]
17668#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17669#[rustc_legacy_const_generics(3)]
17670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17671pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17672    base_addr: *mut f32,
17673    vindex: __m256i,
17674    a: __m128,
17675) {
17676    static_assert_imm8_scale!(SCALE);
17677    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17678}
17679
17680/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17681/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17682/// mask bit is not set are not written to memory).
17683///
17684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17685#[inline]
17686#[target_feature(enable = "avx512f,avx512vl")]
17687#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17688#[rustc_legacy_const_generics(4)]
17689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17690pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17691    base_addr: *mut f32,
17692    k: __mmask8,
17693    vindex: __m256i,
17694    a: __m128,
17695) {
17696    static_assert_imm8_scale!(SCALE);
17697    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17698}
17699
17700/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17701/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17702/// mask bit is not set).
17703///
17704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17705#[inline]
17706#[target_feature(enable = "avx512f,avx512vl")]
17707#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17708#[rustc_legacy_const_generics(4)]
17709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17710pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17711    src: __m256i,
17712    k: __mmask8,
17713    vindex: __m256i,
17714    base_addr: *const i32,
17715) -> __m256i {
17716    static_assert_imm8_scale!(SCALE);
17717    transmute(vpgatherdd_256(
17718        src.as_i32x8(),
17719        base_addr as _,
17720        vindex.as_i32x8(),
17721        k,
17722        SCALE,
17723    ))
17724}
17725
17726/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17727/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17728/// mask bit is not set).
17729///
17730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17731#[inline]
17732#[target_feature(enable = "avx512f,avx512vl")]
17733#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17734#[rustc_legacy_const_generics(4)]
17735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17736pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17737    src: __m256i,
17738    k: __mmask8,
17739    vindex: __m128i,
17740    base_addr: *const i64,
17741) -> __m256i {
17742    static_assert_imm8_scale!(SCALE);
17743    transmute(vpgatherdq_256(
17744        src.as_i64x4(),
17745        base_addr as _,
17746        vindex.as_i32x4(),
17747        k,
17748        SCALE,
17749    ))
17750}
17751
17752/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17753/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17754/// from src when the corresponding mask bit is not set).
17755///
17756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17757#[inline]
17758#[target_feature(enable = "avx512f,avx512vl")]
17759#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17760#[rustc_legacy_const_generics(4)]
17761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17762pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17763    src: __m256d,
17764    k: __mmask8,
17765    vindex: __m128i,
17766    base_addr: *const f64,
17767) -> __m256d {
17768    static_assert_imm8_scale!(SCALE);
17769    transmute(vgatherdpd_256(
17770        src.as_f64x4(),
17771        base_addr as _,
17772        vindex.as_i32x4(),
17773        k,
17774        SCALE,
17775    ))
17776}
17777
17778/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17779/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17780/// from src when the corresponding mask bit is not set).
17781///
17782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17783#[inline]
17784#[target_feature(enable = "avx512f,avx512vl")]
17785#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17786#[rustc_legacy_const_generics(4)]
17787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17788pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17789    src: __m256,
17790    k: __mmask8,
17791    vindex: __m256i,
17792    base_addr: *const f32,
17793) -> __m256 {
17794    static_assert_imm8_scale!(SCALE);
17795    transmute(vgatherdps_256(
17796        src.as_f32x8(),
17797        base_addr as _,
17798        vindex.as_i32x8(),
17799        k,
17800        SCALE,
17801    ))
17802}
17803
17804/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17805/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17806/// mask bit is not set).
17807///
17808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17809#[inline]
17810#[target_feature(enable = "avx512f,avx512vl")]
17811#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17812#[rustc_legacy_const_generics(4)]
17813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17814pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17815    src: __m128i,
17816    k: __mmask8,
17817    vindex: __m256i,
17818    base_addr: *const i32,
17819) -> __m128i {
17820    static_assert_imm8_scale!(SCALE);
17821    transmute(vpgatherqd_256(
17822        src.as_i32x4(),
17823        base_addr as _,
17824        vindex.as_i64x4(),
17825        k,
17826        SCALE,
17827    ))
17828}
17829
17830/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17831/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17832/// mask bit is not set).
17833///
17834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17835#[inline]
17836#[target_feature(enable = "avx512f,avx512vl")]
17837#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17838#[rustc_legacy_const_generics(4)]
17839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17840pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17841    src: __m256i,
17842    k: __mmask8,
17843    vindex: __m256i,
17844    base_addr: *const i64,
17845) -> __m256i {
17846    static_assert_imm8_scale!(SCALE);
17847    transmute(vpgatherqq_256(
17848        src.as_i64x4(),
17849        base_addr as _,
17850        vindex.as_i64x4(),
17851        k,
17852        SCALE,
17853    ))
17854}
17855
17856/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17857/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17858/// from src when the corresponding mask bit is not set).
17859///
17860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17861#[inline]
17862#[target_feature(enable = "avx512f,avx512vl")]
17863#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17864#[rustc_legacy_const_generics(4)]
17865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17866pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17867    src: __m256d,
17868    k: __mmask8,
17869    vindex: __m256i,
17870    base_addr: *const f64,
17871) -> __m256d {
17872    static_assert_imm8_scale!(SCALE);
17873    transmute(vgatherqpd_256(
17874        src.as_f64x4(),
17875        base_addr as _,
17876        vindex.as_i64x4(),
17877        k,
17878        SCALE,
17879    ))
17880}
17881
17882/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17883/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17884/// from src when the corresponding mask bit is not set).
17885///
17886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17887#[inline]
17888#[target_feature(enable = "avx512f,avx512vl")]
17889#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17890#[rustc_legacy_const_generics(4)]
17891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17892pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17893    src: __m128,
17894    k: __mmask8,
17895    vindex: __m256i,
17896    base_addr: *const f32,
17897) -> __m128 {
17898    static_assert_imm8_scale!(SCALE);
17899    transmute(vgatherqps_256(
17900        src.as_f32x4(),
17901        base_addr as _,
17902        vindex.as_i64x4(),
17903        k,
17904        SCALE,
17905    ))
17906}
17907
17908/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17909/// indices stored in vindex scaled by scale
17910///
17911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17912#[inline]
17913#[target_feature(enable = "avx512f,avx512vl")]
17914#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17915#[rustc_legacy_const_generics(3)]
17916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17917pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17918    base_addr: *mut i32,
17919    vindex: __m128i,
17920    a: __m128i,
17921) {
17922    static_assert_imm8_scale!(SCALE);
17923    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17924}
17925
17926/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17927/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17928/// are not written to memory).
17929///
17930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17931#[inline]
17932#[target_feature(enable = "avx512f,avx512vl")]
17933#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17934#[rustc_legacy_const_generics(4)]
17935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17936pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17937    base_addr: *mut i32,
17938    k: __mmask8,
17939    vindex: __m128i,
17940    a: __m128i,
17941) {
17942    static_assert_imm8_scale!(SCALE);
17943    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17944}
17945
17946/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17947/// indices stored in vindex scaled by scale
17948///
17949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17950#[inline]
17951#[target_feature(enable = "avx512f,avx512vl")]
17952#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17953#[rustc_legacy_const_generics(3)]
17954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17955pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17956    base_addr: *mut i64,
17957    vindex: __m128i,
17958    a: __m128i,
17959) {
17960    static_assert_imm8_scale!(SCALE);
17961    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17962}
17963
17964/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17965/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17966/// are not written to memory).
17967///
17968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17969#[inline]
17970#[target_feature(enable = "avx512f,avx512vl")]
17971#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17972#[rustc_legacy_const_generics(4)]
17973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17974pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17975    base_addr: *mut i64,
17976    k: __mmask8,
17977    vindex: __m128i,
17978    a: __m128i,
17979) {
17980    static_assert_imm8_scale!(SCALE);
17981    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17982}
17983
17984/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17985/// at packed 32-bit integer indices stored in vindex scaled by scale
17986///
17987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17988#[inline]
17989#[target_feature(enable = "avx512f,avx512vl")]
17990#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17991#[rustc_legacy_const_generics(3)]
17992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17993pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
17994    base_addr: *mut f64,
17995    vindex: __m128i,
17996    a: __m128d,
17997) {
17998    static_assert_imm8_scale!(SCALE);
17999    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18000}
18001
18002/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18003/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18004/// mask bit is not set are not written to memory).
18005///
18006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18007#[inline]
18008#[target_feature(enable = "avx512f,avx512vl")]
18009#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18010#[rustc_legacy_const_generics(4)]
18011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18012pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18013    base_addr: *mut f64,
18014    k: __mmask8,
18015    vindex: __m128i,
18016    a: __m128d,
18017) {
18018    static_assert_imm8_scale!(SCALE);
18019    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18020}
18021
18022/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18023/// at packed 32-bit integer indices stored in vindex scaled by scale
18024///
18025/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18026#[inline]
18027#[target_feature(enable = "avx512f,avx512vl")]
18028#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18029#[rustc_legacy_const_generics(3)]
18030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18031pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18032    static_assert_imm8_scale!(SCALE);
18033    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18034}
18035
18036/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18037/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18038/// mask bit is not set are not written to memory).
18039///
18040/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18041#[inline]
18042#[target_feature(enable = "avx512f,avx512vl")]
18043#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18044#[rustc_legacy_const_generics(4)]
18045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18046pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18047    base_addr: *mut f32,
18048    k: __mmask8,
18049    vindex: __m128i,
18050    a: __m128,
18051) {
18052    static_assert_imm8_scale!(SCALE);
18053    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18054}
18055
18056/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18057/// indices stored in vindex scaled by scale
18058///
18059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18060#[inline]
18061#[target_feature(enable = "avx512f,avx512vl")]
18062#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18063#[rustc_legacy_const_generics(3)]
18064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18065pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18066    base_addr: *mut i32,
18067    vindex: __m128i,
18068    a: __m128i,
18069) {
18070    static_assert_imm8_scale!(SCALE);
18071    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18072}
18073
18074/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18075/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18076/// are not written to memory).
18077///
18078/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18079#[inline]
18080#[target_feature(enable = "avx512f,avx512vl")]
18081#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18082#[rustc_legacy_const_generics(4)]
18083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18084pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18085    base_addr: *mut i32,
18086    k: __mmask8,
18087    vindex: __m128i,
18088    a: __m128i,
18089) {
18090    static_assert_imm8_scale!(SCALE);
18091    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18092}
18093
18094/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18095/// indices stored in vindex scaled by scale
18096///
18097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18098#[inline]
18099#[target_feature(enable = "avx512f,avx512vl")]
18100#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18101#[rustc_legacy_const_generics(3)]
18102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18103pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18104    base_addr: *mut i64,
18105    vindex: __m128i,
18106    a: __m128i,
18107) {
18108    static_assert_imm8_scale!(SCALE);
18109    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18110}
18111
18112/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18113/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18114/// are not written to memory).
18115///
18116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18117#[inline]
18118#[target_feature(enable = "avx512f,avx512vl")]
18119#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18120#[rustc_legacy_const_generics(4)]
18121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18122pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18123    base_addr: *mut i64,
18124    k: __mmask8,
18125    vindex: __m128i,
18126    a: __m128i,
18127) {
18128    static_assert_imm8_scale!(SCALE);
18129    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18130}
18131
18132/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18133/// at packed 64-bit integer indices stored in vindex scaled by scale
18134///
18135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18136#[inline]
18137#[target_feature(enable = "avx512f,avx512vl")]
18138#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18139#[rustc_legacy_const_generics(3)]
18140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18141pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18142    base_addr: *mut f64,
18143    vindex: __m128i,
18144    a: __m128d,
18145) {
18146    static_assert_imm8_scale!(SCALE);
18147    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18148}
18149
18150/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18151/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18152/// mask bit is not set are not written to memory).
18153///
18154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18155#[inline]
18156#[target_feature(enable = "avx512f,avx512vl")]
18157#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18158#[rustc_legacy_const_generics(4)]
18159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18160pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18161    base_addr: *mut f64,
18162    k: __mmask8,
18163    vindex: __m128i,
18164    a: __m128d,
18165) {
18166    static_assert_imm8_scale!(SCALE);
18167    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18168}
18169
18170/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18171/// at packed 64-bit integer indices stored in vindex scaled by scale
18172///
18173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18174#[inline]
18175#[target_feature(enable = "avx512f,avx512vl")]
18176#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18177#[rustc_legacy_const_generics(3)]
18178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18179pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18180    static_assert_imm8_scale!(SCALE);
18181    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18182}
18183
18184/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18185/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18186///
18187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18188#[inline]
18189#[target_feature(enable = "avx512f,avx512vl")]
18190#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18191#[rustc_legacy_const_generics(4)]
18192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18193pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18194    base_addr: *mut f32,
18195    k: __mmask8,
18196    vindex: __m128i,
18197    a: __m128,
18198) {
18199    static_assert_imm8_scale!(SCALE);
18200    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18201}
18202
18203/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18204/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18205/// mask bit is not set).
18206///
18207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18208#[inline]
18209#[target_feature(enable = "avx512f,avx512vl")]
18210#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18211#[rustc_legacy_const_generics(4)]
18212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18213pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18214    src: __m128i,
18215    k: __mmask8,
18216    vindex: __m128i,
18217    base_addr: *const i32,
18218) -> __m128i {
18219    static_assert_imm8_scale!(SCALE);
18220    transmute(vpgatherdd_128(
18221        src.as_i32x4(),
18222        base_addr as _,
18223        vindex.as_i32x4(),
18224        k,
18225        SCALE,
18226    ))
18227}
18228
18229/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18230/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18231/// mask bit is not set).
18232///
18233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18234#[inline]
18235#[target_feature(enable = "avx512f,avx512vl")]
18236#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18237#[rustc_legacy_const_generics(4)]
18238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18239pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18240    src: __m128i,
18241    k: __mmask8,
18242    vindex: __m128i,
18243    base_addr: *const i64,
18244) -> __m128i {
18245    static_assert_imm8_scale!(SCALE);
18246    transmute(vpgatherdq_128(
18247        src.as_i64x2(),
18248        base_addr as _,
18249        vindex.as_i32x4(),
18250        k,
18251        SCALE,
18252    ))
18253}
18254
18255/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18256/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18257/// from src when the corresponding mask bit is not set).
18258///
18259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18260#[inline]
18261#[target_feature(enable = "avx512f,avx512vl")]
18262#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18263#[rustc_legacy_const_generics(4)]
18264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18265pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18266    src: __m128d,
18267    k: __mmask8,
18268    vindex: __m128i,
18269    base_addr: *const f64,
18270) -> __m128d {
18271    static_assert_imm8_scale!(SCALE);
18272    transmute(vgatherdpd_128(
18273        src.as_f64x2(),
18274        base_addr as _,
18275        vindex.as_i32x4(),
18276        k,
18277        SCALE,
18278    ))
18279}
18280
18281/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18282/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18283/// from src when the corresponding mask bit is not set).
18284///
18285/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18286#[inline]
18287#[target_feature(enable = "avx512f,avx512vl")]
18288#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18289#[rustc_legacy_const_generics(4)]
18290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18291pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18292    src: __m128,
18293    k: __mmask8,
18294    vindex: __m128i,
18295    base_addr: *const f32,
18296) -> __m128 {
18297    static_assert_imm8_scale!(SCALE);
18298    transmute(vgatherdps_128(
18299        src.as_f32x4(),
18300        base_addr as _,
18301        vindex.as_i32x4(),
18302        k,
18303        SCALE,
18304    ))
18305}
18306
18307/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18308/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18309/// mask bit is not set).
18310///
18311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18312#[inline]
18313#[target_feature(enable = "avx512f,avx512vl")]
18314#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18315#[rustc_legacy_const_generics(4)]
18316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18317pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18318    src: __m128i,
18319    k: __mmask8,
18320    vindex: __m128i,
18321    base_addr: *const i32,
18322) -> __m128i {
18323    static_assert_imm8_scale!(SCALE);
18324    transmute(vpgatherqd_128(
18325        src.as_i32x4(),
18326        base_addr as _,
18327        vindex.as_i64x2(),
18328        k,
18329        SCALE,
18330    ))
18331}
18332
18333/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18334/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18335/// mask bit is not set).
18336///
18337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18338#[inline]
18339#[target_feature(enable = "avx512f,avx512vl")]
18340#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18341#[rustc_legacy_const_generics(4)]
18342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18343pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18344    src: __m128i,
18345    k: __mmask8,
18346    vindex: __m128i,
18347    base_addr: *const i64,
18348) -> __m128i {
18349    static_assert_imm8_scale!(SCALE);
18350    transmute(vpgatherqq_128(
18351        src.as_i64x2(),
18352        base_addr as _,
18353        vindex.as_i64x2(),
18354        k,
18355        SCALE,
18356    ))
18357}
18358
18359/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18360/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18361/// from src when the corresponding mask bit is not set).
18362///
18363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18364#[inline]
18365#[target_feature(enable = "avx512f,avx512vl")]
18366#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18367#[rustc_legacy_const_generics(4)]
18368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18369pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18370    src: __m128d,
18371    k: __mmask8,
18372    vindex: __m128i,
18373    base_addr: *const f64,
18374) -> __m128d {
18375    static_assert_imm8_scale!(SCALE);
18376    transmute(vgatherqpd_128(
18377        src.as_f64x2(),
18378        base_addr as _,
18379        vindex.as_i64x2(),
18380        k,
18381        SCALE,
18382    ))
18383}
18384
18385/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18386/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18387/// from src when the corresponding mask bit is not set).
18388///
18389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18390#[inline]
18391#[target_feature(enable = "avx512f,avx512vl")]
18392#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18393#[rustc_legacy_const_generics(4)]
18394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18395pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18396    src: __m128,
18397    k: __mmask8,
18398    vindex: __m128i,
18399    base_addr: *const f32,
18400) -> __m128 {
18401    static_assert_imm8_scale!(SCALE);
18402    transmute(vgatherqps_128(
18403        src.as_f32x4(),
18404        base_addr as _,
18405        vindex.as_i64x2(),
18406        k,
18407        SCALE,
18408    ))
18409}
18410
18411/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18412///
18413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18414#[inline]
18415#[target_feature(enable = "avx512f")]
18416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18417#[cfg_attr(test, assert_instr(vpcompressd))]
18418pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18419    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18420}
18421
18422/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18423///
18424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18425#[inline]
18426#[target_feature(enable = "avx512f")]
18427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18428#[cfg_attr(test, assert_instr(vpcompressd))]
18429pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18430    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18431}
18432
18433/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18434///
18435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18436#[inline]
18437#[target_feature(enable = "avx512f,avx512vl")]
18438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18439#[cfg_attr(test, assert_instr(vpcompressd))]
18440pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18441    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18442}
18443
18444/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18445///
18446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18447#[inline]
18448#[target_feature(enable = "avx512f,avx512vl")]
18449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18450#[cfg_attr(test, assert_instr(vpcompressd))]
18451pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18452    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18453}
18454
18455/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18456///
18457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18458#[inline]
18459#[target_feature(enable = "avx512f,avx512vl")]
18460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18461#[cfg_attr(test, assert_instr(vpcompressd))]
18462pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18463    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18464}
18465
18466/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18467///
18468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18472#[cfg_attr(test, assert_instr(vpcompressd))]
18473pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18474    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18475}
18476
18477/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18478///
18479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18480#[inline]
18481#[target_feature(enable = "avx512f")]
18482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18483#[cfg_attr(test, assert_instr(vpcompressq))]
18484pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18485    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18486}
18487
18488/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18489///
18490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18491#[inline]
18492#[target_feature(enable = "avx512f")]
18493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18494#[cfg_attr(test, assert_instr(vpcompressq))]
18495pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18496    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18497}
18498
18499/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18500///
18501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18502#[inline]
18503#[target_feature(enable = "avx512f,avx512vl")]
18504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18505#[cfg_attr(test, assert_instr(vpcompressq))]
18506pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18507    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18508}
18509
18510/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18511///
18512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18513#[inline]
18514#[target_feature(enable = "avx512f,avx512vl")]
18515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18516#[cfg_attr(test, assert_instr(vpcompressq))]
18517pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18518    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18519}
18520
18521/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18522///
18523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18524#[inline]
18525#[target_feature(enable = "avx512f,avx512vl")]
18526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18527#[cfg_attr(test, assert_instr(vpcompressq))]
18528pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18529    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18530}
18531
18532/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18533///
18534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18535#[inline]
18536#[target_feature(enable = "avx512f,avx512vl")]
18537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18538#[cfg_attr(test, assert_instr(vpcompressq))]
18539pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18540    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18541}
18542
18543/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18544///
18545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18546#[inline]
18547#[target_feature(enable = "avx512f")]
18548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18549#[cfg_attr(test, assert_instr(vcompressps))]
18550pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18551    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18552}
18553
18554/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18555///
18556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18557#[inline]
18558#[target_feature(enable = "avx512f")]
18559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18560#[cfg_attr(test, assert_instr(vcompressps))]
18561pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18562    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18563}
18564
18565/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18566///
18567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18568#[inline]
18569#[target_feature(enable = "avx512f,avx512vl")]
18570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18571#[cfg_attr(test, assert_instr(vcompressps))]
18572pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18573    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18574}
18575
18576/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18577///
18578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18579#[inline]
18580#[target_feature(enable = "avx512f,avx512vl")]
18581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18582#[cfg_attr(test, assert_instr(vcompressps))]
18583pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18584    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18585}
18586
18587/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18588///
18589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18590#[inline]
18591#[target_feature(enable = "avx512f,avx512vl")]
18592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18593#[cfg_attr(test, assert_instr(vcompressps))]
18594pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18595    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
18596}
18597
18598/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18599///
18600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18601#[inline]
18602#[target_feature(enable = "avx512f,avx512vl")]
18603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18604#[cfg_attr(test, assert_instr(vcompressps))]
18605pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18606    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
18607}
18608
18609/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18610///
18611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18612#[inline]
18613#[target_feature(enable = "avx512f")]
18614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18615#[cfg_attr(test, assert_instr(vcompresspd))]
18616pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18617    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
18618}
18619
18620/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18621///
18622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18623#[inline]
18624#[target_feature(enable = "avx512f")]
18625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18626#[cfg_attr(test, assert_instr(vcompresspd))]
18627pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18628    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
18629}
18630
18631/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18632///
18633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18634#[inline]
18635#[target_feature(enable = "avx512f,avx512vl")]
18636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18637#[cfg_attr(test, assert_instr(vcompresspd))]
18638pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18639    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
18640}
18641
18642/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18643///
18644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18645#[inline]
18646#[target_feature(enable = "avx512f,avx512vl")]
18647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648#[cfg_attr(test, assert_instr(vcompresspd))]
18649pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18650    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
18651}
18652
18653/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18654///
18655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18656#[inline]
18657#[target_feature(enable = "avx512f,avx512vl")]
18658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18659#[cfg_attr(test, assert_instr(vcompresspd))]
18660pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18661    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
18662}
18663
18664/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18665///
18666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18667#[inline]
18668#[target_feature(enable = "avx512f,avx512vl")]
18669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18670#[cfg_attr(test, assert_instr(vcompresspd))]
18671pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18672    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
18673}
18674
18675/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18676///
18677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18678#[inline]
18679#[target_feature(enable = "avx512f")]
18680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18681#[cfg_attr(test, assert_instr(vpcompressd))]
18682pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
18683    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
18684}
18685
18686/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18687///
18688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18689#[inline]
18690#[target_feature(enable = "avx512f,avx512vl")]
18691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18692#[cfg_attr(test, assert_instr(vpcompressd))]
18693pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
18694    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
18695}
18696
18697/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18698///
18699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18700#[inline]
18701#[target_feature(enable = "avx512f,avx512vl")]
18702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18703#[cfg_attr(test, assert_instr(vpcompressd))]
18704pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
18705    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
18706}
18707
18708/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18709///
18710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18711#[inline]
18712#[target_feature(enable = "avx512f")]
18713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18714#[cfg_attr(test, assert_instr(vpcompressq))]
18715pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
18716    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
18717}
18718
18719/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18720///
18721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18722#[inline]
18723#[target_feature(enable = "avx512f,avx512vl")]
18724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18725#[cfg_attr(test, assert_instr(vpcompressq))]
18726pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
18727    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
18728}
18729
18730/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18731///
18732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18733#[inline]
18734#[target_feature(enable = "avx512f,avx512vl")]
18735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18736#[cfg_attr(test, assert_instr(vpcompressq))]
18737pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
18738    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
18739}
18740
18741/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18742///
18743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18744#[inline]
18745#[target_feature(enable = "avx512f")]
18746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18747#[cfg_attr(test, assert_instr(vcompressps))]
18748pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
18749    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
18750}
18751
18752/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18753///
18754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18755#[inline]
18756#[target_feature(enable = "avx512f,avx512vl")]
18757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18758#[cfg_attr(test, assert_instr(vcompressps))]
18759pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
18760    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
18761}
18762
18763/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18764///
18765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18766#[inline]
18767#[target_feature(enable = "avx512f,avx512vl")]
18768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18769#[cfg_attr(test, assert_instr(vcompressps))]
18770pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
18771    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
18772}
18773
18774/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18775///
18776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18777#[inline]
18778#[target_feature(enable = "avx512f")]
18779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18780#[cfg_attr(test, assert_instr(vcompresspd))]
18781pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
18782    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
18783}
18784
18785/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18786///
18787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18788#[inline]
18789#[target_feature(enable = "avx512f,avx512vl")]
18790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18791#[cfg_attr(test, assert_instr(vcompresspd))]
18792pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
18793    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
18794}
18795
18796/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18797///
18798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18802#[cfg_attr(test, assert_instr(vcompresspd))]
18803pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
18804    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
18805}
18806
18807/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18808///
18809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18810#[inline]
18811#[target_feature(enable = "avx512f")]
18812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18813#[cfg_attr(test, assert_instr(vpexpandd))]
18814pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18815    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
18816}
18817
18818/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18819///
18820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18821#[inline]
18822#[target_feature(enable = "avx512f")]
18823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18824#[cfg_attr(test, assert_instr(vpexpandd))]
18825pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18826    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
18827}
18828
18829/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18830///
18831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18832#[inline]
18833#[target_feature(enable = "avx512f,avx512vl")]
18834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18835#[cfg_attr(test, assert_instr(vpexpandd))]
18836pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18837    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
18838}
18839
18840/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18841///
18842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18843#[inline]
18844#[target_feature(enable = "avx512f,avx512vl")]
18845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18846#[cfg_attr(test, assert_instr(vpexpandd))]
18847pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18848    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
18849}
18850
18851/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18852///
18853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18854#[inline]
18855#[target_feature(enable = "avx512f,avx512vl")]
18856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18857#[cfg_attr(test, assert_instr(vpexpandd))]
18858pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18859    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
18860}
18861
18862/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18863///
18864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18865#[inline]
18866#[target_feature(enable = "avx512f,avx512vl")]
18867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18868#[cfg_attr(test, assert_instr(vpexpandd))]
18869pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18870    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
18871}
18872
18873/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18874///
18875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18876#[inline]
18877#[target_feature(enable = "avx512f")]
18878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18879#[cfg_attr(test, assert_instr(vpexpandq))]
18880pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18881    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
18882}
18883
18884/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18885///
18886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18887#[inline]
18888#[target_feature(enable = "avx512f")]
18889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18890#[cfg_attr(test, assert_instr(vpexpandq))]
18891pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18892    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
18893}
18894
18895/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18896///
18897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18898#[inline]
18899#[target_feature(enable = "avx512f,avx512vl")]
18900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18901#[cfg_attr(test, assert_instr(vpexpandq))]
18902pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18903    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
18904}
18905
18906/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18907///
18908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18909#[inline]
18910#[target_feature(enable = "avx512f,avx512vl")]
18911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18912#[cfg_attr(test, assert_instr(vpexpandq))]
18913pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18914    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
18915}
18916
18917/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18918///
18919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18920#[inline]
18921#[target_feature(enable = "avx512f,avx512vl")]
18922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18923#[cfg_attr(test, assert_instr(vpexpandq))]
18924pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18925    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
18926}
18927
18928/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18929///
18930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18931#[inline]
18932#[target_feature(enable = "avx512f,avx512vl")]
18933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18934#[cfg_attr(test, assert_instr(vpexpandq))]
18935pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18936    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
18937}
18938
18939/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18940///
18941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18942#[inline]
18943#[target_feature(enable = "avx512f")]
18944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18945#[cfg_attr(test, assert_instr(vexpandps))]
18946pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18947    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
18948}
18949
18950/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18951///
18952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18953#[inline]
18954#[target_feature(enable = "avx512f")]
18955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18956#[cfg_attr(test, assert_instr(vexpandps))]
18957pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18958    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
18959}
18960
18961/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18962///
18963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18964#[inline]
18965#[target_feature(enable = "avx512f,avx512vl")]
18966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18967#[cfg_attr(test, assert_instr(vexpandps))]
18968pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18969    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
18970}
18971
18972/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18973///
18974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18975#[inline]
18976#[target_feature(enable = "avx512f,avx512vl")]
18977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18978#[cfg_attr(test, assert_instr(vexpandps))]
18979pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18980    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
18981}
18982
18983/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18984///
18985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18986#[inline]
18987#[target_feature(enable = "avx512f,avx512vl")]
18988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18989#[cfg_attr(test, assert_instr(vexpandps))]
18990pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18991    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
18992}
18993
18994/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18995///
18996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18997#[inline]
18998#[target_feature(enable = "avx512f,avx512vl")]
18999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19000#[cfg_attr(test, assert_instr(vexpandps))]
19001pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19002    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
19003}
19004
19005/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19006///
19007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19008#[inline]
19009#[target_feature(enable = "avx512f")]
19010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19011#[cfg_attr(test, assert_instr(vexpandpd))]
19012pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19013    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
19014}
19015
19016/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19017///
19018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19019#[inline]
19020#[target_feature(enable = "avx512f")]
19021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19022#[cfg_attr(test, assert_instr(vexpandpd))]
19023pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19024    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
19025}
19026
19027/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19028///
19029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19030#[inline]
19031#[target_feature(enable = "avx512f,avx512vl")]
19032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19033#[cfg_attr(test, assert_instr(vexpandpd))]
19034pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19035    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
19036}
19037
19038/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19039///
19040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19041#[inline]
19042#[target_feature(enable = "avx512f,avx512vl")]
19043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19044#[cfg_attr(test, assert_instr(vexpandpd))]
19045pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19046    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
19047}
19048
19049/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19050///
19051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19052#[inline]
19053#[target_feature(enable = "avx512f,avx512vl")]
19054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19055#[cfg_attr(test, assert_instr(vexpandpd))]
19056pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19057    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
19058}
19059
19060/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19061///
19062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19063#[inline]
19064#[target_feature(enable = "avx512f,avx512vl")]
19065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19066#[cfg_attr(test, assert_instr(vexpandpd))]
19067pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19068    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
19069}
19070
19071/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19072///
19073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19074#[inline]
19075#[target_feature(enable = "avx512f")]
19076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19077#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19078#[rustc_legacy_const_generics(1)]
19079pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19080    static_assert_uimm_bits!(IMM8, 8);
19081    _mm512_rolv_epi32(a, _mm512_set1_epi32(IMM8))
19082}
19083
19084/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19085///
19086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19087#[inline]
19088#[target_feature(enable = "avx512f")]
19089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19090#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19091#[rustc_legacy_const_generics(3)]
19092pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19093    static_assert_uimm_bits!(IMM8, 8);
19094    _mm512_mask_rolv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19095}
19096
19097/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19098///
19099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19100#[inline]
19101#[target_feature(enable = "avx512f")]
19102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19103#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19104#[rustc_legacy_const_generics(2)]
19105pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19106    static_assert_uimm_bits!(IMM8, 8);
19107    _mm512_maskz_rolv_epi32(k, a, _mm512_set1_epi32(IMM8))
19108}
19109
19110/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19111///
19112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19113#[inline]
19114#[target_feature(enable = "avx512f,avx512vl")]
19115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19116#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19117#[rustc_legacy_const_generics(1)]
19118pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19119    static_assert_uimm_bits!(IMM8, 8);
19120    _mm256_rolv_epi32(a, _mm256_set1_epi32(IMM8))
19121}
19122
19123/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19124///
19125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19126#[inline]
19127#[target_feature(enable = "avx512f,avx512vl")]
19128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19129#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19130#[rustc_legacy_const_generics(3)]
19131pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19132    static_assert_uimm_bits!(IMM8, 8);
19133    _mm256_mask_rolv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19134}
19135
19136/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19137///
19138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19139#[inline]
19140#[target_feature(enable = "avx512f,avx512vl")]
19141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19142#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19143#[rustc_legacy_const_generics(2)]
19144pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19145    static_assert_uimm_bits!(IMM8, 8);
19146    _mm256_maskz_rolv_epi32(k, a, _mm256_set1_epi32(IMM8))
19147}
19148
19149/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19150///
19151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19152#[inline]
19153#[target_feature(enable = "avx512f,avx512vl")]
19154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19155#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19156#[rustc_legacy_const_generics(1)]
19157pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19158    static_assert_uimm_bits!(IMM8, 8);
19159    _mm_rolv_epi32(a, _mm_set1_epi32(IMM8))
19160}
19161
19162/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19163///
19164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19165#[inline]
19166#[target_feature(enable = "avx512f,avx512vl")]
19167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19168#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19169#[rustc_legacy_const_generics(3)]
19170pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19171    static_assert_uimm_bits!(IMM8, 8);
19172    _mm_mask_rolv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19173}
19174
19175/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19176///
19177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19178#[inline]
19179#[target_feature(enable = "avx512f,avx512vl")]
19180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19181#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19182#[rustc_legacy_const_generics(2)]
19183pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19184    static_assert_uimm_bits!(IMM8, 8);
19185    _mm_maskz_rolv_epi32(k, a, _mm_set1_epi32(IMM8))
19186}
19187
19188/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19189///
19190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19191#[inline]
19192#[target_feature(enable = "avx512f")]
19193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19194#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19195#[rustc_legacy_const_generics(1)]
19196pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19197    static_assert_uimm_bits!(IMM8, 8);
19198    _mm512_rorv_epi32(a, _mm512_set1_epi32(IMM8))
19199}
19200
19201/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19202///
19203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19204#[inline]
19205#[target_feature(enable = "avx512f")]
19206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19207#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19208#[rustc_legacy_const_generics(3)]
19209pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19210    static_assert_uimm_bits!(IMM8, 8);
19211    _mm512_mask_rorv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19212}
19213
19214/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19215///
19216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19217#[inline]
19218#[target_feature(enable = "avx512f")]
19219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19220#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19221#[rustc_legacy_const_generics(2)]
19222pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19223    static_assert_uimm_bits!(IMM8, 8);
19224    _mm512_maskz_rorv_epi32(k, a, _mm512_set1_epi32(IMM8))
19225}
19226
19227/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19228///
19229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19230#[inline]
19231#[target_feature(enable = "avx512f,avx512vl")]
19232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19233#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19234#[rustc_legacy_const_generics(1)]
19235pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19236    static_assert_uimm_bits!(IMM8, 8);
19237    _mm256_rorv_epi32(a, _mm256_set1_epi32(IMM8))
19238}
19239
19240/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19241///
19242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19243#[inline]
19244#[target_feature(enable = "avx512f,avx512vl")]
19245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19246#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19247#[rustc_legacy_const_generics(3)]
19248pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19249    static_assert_uimm_bits!(IMM8, 8);
19250    _mm256_mask_rorv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19251}
19252
19253/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19254///
19255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19256#[inline]
19257#[target_feature(enable = "avx512f,avx512vl")]
19258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19259#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19260#[rustc_legacy_const_generics(2)]
19261pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19262    static_assert_uimm_bits!(IMM8, 8);
19263    _mm256_maskz_rorv_epi32(k, a, _mm256_set1_epi32(IMM8))
19264}
19265
19266/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19267///
19268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19269#[inline]
19270#[target_feature(enable = "avx512f,avx512vl")]
19271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19272#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19273#[rustc_legacy_const_generics(1)]
19274pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19275    static_assert_uimm_bits!(IMM8, 8);
19276    _mm_rorv_epi32(a, _mm_set1_epi32(IMM8))
19277}
19278
19279/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19280///
19281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19282#[inline]
19283#[target_feature(enable = "avx512f,avx512vl")]
19284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19285#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19286#[rustc_legacy_const_generics(3)]
19287pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19288    static_assert_uimm_bits!(IMM8, 8);
19289    _mm_mask_rorv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19290}
19291
19292/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19293///
19294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19295#[inline]
19296#[target_feature(enable = "avx512f,avx512vl")]
19297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19298#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19299#[rustc_legacy_const_generics(2)]
19300pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19301    static_assert_uimm_bits!(IMM8, 8);
19302    _mm_maskz_rorv_epi32(k, a, _mm_set1_epi32(IMM8))
19303}
19304
19305/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19306///
19307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19308#[inline]
19309#[target_feature(enable = "avx512f")]
19310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19311#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19312#[rustc_legacy_const_generics(1)]
19313pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19314    static_assert_uimm_bits!(IMM8, 8);
19315    _mm512_rolv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19316}
19317
19318/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19319///
19320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19321#[inline]
19322#[target_feature(enable = "avx512f")]
19323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19324#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19325#[rustc_legacy_const_generics(3)]
19326pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19327    static_assert_uimm_bits!(IMM8, 8);
19328    _mm512_mask_rolv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19329}
19330
19331/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19332///
19333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19334#[inline]
19335#[target_feature(enable = "avx512f")]
19336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19337#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19338#[rustc_legacy_const_generics(2)]
19339pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19340    static_assert_uimm_bits!(IMM8, 8);
19341    _mm512_maskz_rolv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19342}
19343
19344/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19345///
19346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19347#[inline]
19348#[target_feature(enable = "avx512f,avx512vl")]
19349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19350#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19351#[rustc_legacy_const_generics(1)]
19352pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19353    static_assert_uimm_bits!(IMM8, 8);
19354    _mm256_rolv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19355}
19356
19357/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19358///
19359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19360#[inline]
19361#[target_feature(enable = "avx512f,avx512vl")]
19362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19363#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19364#[rustc_legacy_const_generics(3)]
19365pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19366    static_assert_uimm_bits!(IMM8, 8);
19367    _mm256_mask_rolv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19368}
19369
19370/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19371///
19372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19373#[inline]
19374#[target_feature(enable = "avx512f,avx512vl")]
19375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19376#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19377#[rustc_legacy_const_generics(2)]
19378pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19379    static_assert_uimm_bits!(IMM8, 8);
19380    _mm256_maskz_rolv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19381}
19382
19383/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19384///
19385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19386#[inline]
19387#[target_feature(enable = "avx512f,avx512vl")]
19388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19389#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19390#[rustc_legacy_const_generics(1)]
19391pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19392    static_assert_uimm_bits!(IMM8, 8);
19393    _mm_rolv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19394}
19395
19396/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19397///
19398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19399#[inline]
19400#[target_feature(enable = "avx512f,avx512vl")]
19401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19402#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19403#[rustc_legacy_const_generics(3)]
19404pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19405    static_assert_uimm_bits!(IMM8, 8);
19406    _mm_mask_rolv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
19407}
19408
19409/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19410///
19411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19412#[inline]
19413#[target_feature(enable = "avx512f,avx512vl")]
19414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19415#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19416#[rustc_legacy_const_generics(2)]
19417pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19418    static_assert_uimm_bits!(IMM8, 8);
19419    _mm_maskz_rolv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
19420}
19421
19422/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19423///
19424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19425#[inline]
19426#[target_feature(enable = "avx512f")]
19427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19428#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19429#[rustc_legacy_const_generics(1)]
19430pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19431    static_assert_uimm_bits!(IMM8, 8);
19432    _mm512_rorv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19433}
19434
19435/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19436///
19437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19438#[inline]
19439#[target_feature(enable = "avx512f")]
19440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19441#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19442#[rustc_legacy_const_generics(3)]
19443pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19444    static_assert_uimm_bits!(IMM8, 8);
19445    _mm512_mask_rorv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19446}
19447
19448/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19449///
19450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19451#[inline]
19452#[target_feature(enable = "avx512f")]
19453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19454#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19455#[rustc_legacy_const_generics(2)]
19456pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19457    static_assert_uimm_bits!(IMM8, 8);
19458    _mm512_maskz_rorv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19459}
19460
19461/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19462///
19463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19464#[inline]
19465#[target_feature(enable = "avx512f,avx512vl")]
19466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19467#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19468#[rustc_legacy_const_generics(1)]
19469pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19470    static_assert_uimm_bits!(IMM8, 8);
19471    _mm256_rorv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19472}
19473
19474/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19475///
19476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19477#[inline]
19478#[target_feature(enable = "avx512f,avx512vl")]
19479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19480#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19481#[rustc_legacy_const_generics(3)]
19482pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19483    static_assert_uimm_bits!(IMM8, 8);
19484    _mm256_mask_rorv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19485}
19486
19487/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19488///
19489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19490#[inline]
19491#[target_feature(enable = "avx512f,avx512vl")]
19492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19493#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19494#[rustc_legacy_const_generics(2)]
19495pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19496    static_assert_uimm_bits!(IMM8, 8);
19497    _mm256_maskz_rorv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19498}
19499
19500/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19501///
19502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19503#[inline]
19504#[target_feature(enable = "avx512f,avx512vl")]
19505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19506#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19507#[rustc_legacy_const_generics(1)]
19508pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19509    static_assert_uimm_bits!(IMM8, 8);
19510    _mm_rorv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19511}
19512
19513/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19514///
19515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19516#[inline]
19517#[target_feature(enable = "avx512f,avx512vl")]
19518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19519#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19520#[rustc_legacy_const_generics(3)]
19521pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19522    static_assert_uimm_bits!(IMM8, 8);
19523    _mm_mask_rorv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
19524}
19525
19526/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19527///
19528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19529#[inline]
19530#[target_feature(enable = "avx512f,avx512vl")]
19531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19532#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19533#[rustc_legacy_const_generics(2)]
19534pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19535    static_assert_uimm_bits!(IMM8, 8);
19536    _mm_maskz_rorv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
19537}
19538
19539/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19540///
19541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19542#[inline]
19543#[target_feature(enable = "avx512f")]
19544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19545#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19546#[rustc_legacy_const_generics(1)]
19547pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19548    unsafe {
19549        static_assert_uimm_bits!(IMM8, 8);
19550        if IMM8 >= 32 {
19551            _mm512_setzero_si512()
19552        } else {
19553            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
19554        }
19555    }
19556}
19557
19558/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19561#[inline]
19562#[target_feature(enable = "avx512f")]
19563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19564#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19565#[rustc_legacy_const_generics(3)]
19566pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19567    unsafe {
19568        static_assert_uimm_bits!(IMM8, 8);
19569        let shf = if IMM8 >= 32 {
19570            u32x16::ZERO
19571        } else {
19572            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
19573        };
19574        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19575    }
19576}
19577
19578/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19579///
19580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19581#[inline]
19582#[target_feature(enable = "avx512f")]
19583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19584#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19585#[rustc_legacy_const_generics(2)]
19586pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19587    unsafe {
19588        static_assert_uimm_bits!(IMM8, 8);
19589        if IMM8 >= 32 {
19590            _mm512_setzero_si512()
19591        } else {
19592            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
19593            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19594        }
19595    }
19596}
19597
19598/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19599///
19600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19601#[inline]
19602#[target_feature(enable = "avx512f,avx512vl")]
19603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19604#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19605#[rustc_legacy_const_generics(3)]
19606pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19607    unsafe {
19608        static_assert_uimm_bits!(IMM8, 8);
19609        let r = if IMM8 >= 32 {
19610            u32x8::ZERO
19611        } else {
19612            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
19613        };
19614        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19615    }
19616}
19617
19618/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19619///
19620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19621#[inline]
19622#[target_feature(enable = "avx512f,avx512vl")]
19623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19624#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19625#[rustc_legacy_const_generics(2)]
19626pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19627    unsafe {
19628        static_assert_uimm_bits!(IMM8, 8);
19629        if IMM8 >= 32 {
19630            _mm256_setzero_si256()
19631        } else {
19632            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
19633            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19634        }
19635    }
19636}
19637
19638/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19639///
19640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19641#[inline]
19642#[target_feature(enable = "avx512f,avx512vl")]
19643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19644#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19645#[rustc_legacy_const_generics(3)]
19646pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19647    unsafe {
19648        static_assert_uimm_bits!(IMM8, 8);
19649        let r = if IMM8 >= 32 {
19650            u32x4::ZERO
19651        } else {
19652            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
19653        };
19654        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19655    }
19656}
19657
19658/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19659///
19660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19661#[inline]
19662#[target_feature(enable = "avx512f,avx512vl")]
19663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19664#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19665#[rustc_legacy_const_generics(2)]
19666pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19667    unsafe {
19668        static_assert_uimm_bits!(IMM8, 8);
19669        if IMM8 >= 32 {
19670            _mm_setzero_si128()
19671        } else {
19672            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
19673            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19674        }
19675    }
19676}
19677
19678/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19679///
19680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19681#[inline]
19682#[target_feature(enable = "avx512f")]
19683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19684#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19685#[rustc_legacy_const_generics(1)]
19686pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19687    unsafe {
19688        static_assert_uimm_bits!(IMM8, 8);
19689        if IMM8 >= 32 {
19690            _mm512_setzero_si512()
19691        } else {
19692            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
19693        }
19694    }
19695}
19696
19697/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19698///
19699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19700#[inline]
19701#[target_feature(enable = "avx512f")]
19702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19703#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19704#[rustc_legacy_const_generics(3)]
19705pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19706    unsafe {
19707        static_assert_uimm_bits!(IMM8, 8);
19708        let shf = if IMM8 >= 32 {
19709            u32x16::ZERO
19710        } else {
19711            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
19712        };
19713        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19714    }
19715}
19716
19717/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19718///
19719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19720#[inline]
19721#[target_feature(enable = "avx512f")]
19722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19723#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19724#[rustc_legacy_const_generics(2)]
19725pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19726    unsafe {
19727        static_assert_uimm_bits!(IMM8, 8);
19728        if IMM8 >= 32 {
19729            _mm512_setzero_si512()
19730        } else {
19731            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
19732            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19733        }
19734    }
19735}
19736
19737/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19738///
19739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19740#[inline]
19741#[target_feature(enable = "avx512f,avx512vl")]
19742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19743#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19744#[rustc_legacy_const_generics(3)]
19745pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19746    unsafe {
19747        static_assert_uimm_bits!(IMM8, 8);
19748        let r = if IMM8 >= 32 {
19749            u32x8::ZERO
19750        } else {
19751            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
19752        };
19753        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19754    }
19755}
19756
19757/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19758///
19759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19760#[inline]
19761#[target_feature(enable = "avx512f,avx512vl")]
19762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19763#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19764#[rustc_legacy_const_generics(2)]
19765pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19766    unsafe {
19767        static_assert_uimm_bits!(IMM8, 8);
19768        if IMM8 >= 32 {
19769            _mm256_setzero_si256()
19770        } else {
19771            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
19772            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19773        }
19774    }
19775}
19776
19777/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19778///
19779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19780#[inline]
19781#[target_feature(enable = "avx512f,avx512vl")]
19782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19783#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19784#[rustc_legacy_const_generics(3)]
19785pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19786    unsafe {
19787        static_assert_uimm_bits!(IMM8, 8);
19788        let r = if IMM8 >= 32 {
19789            u32x4::ZERO
19790        } else {
19791            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
19792        };
19793        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19794    }
19795}
19796
19797/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19798///
19799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19800#[inline]
19801#[target_feature(enable = "avx512f,avx512vl")]
19802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19803#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19804#[rustc_legacy_const_generics(2)]
19805pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19806    unsafe {
19807        static_assert_uimm_bits!(IMM8, 8);
19808        if IMM8 >= 32 {
19809            _mm_setzero_si128()
19810        } else {
19811            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
19812            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19813        }
19814    }
19815}
19816
19817/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19818///
19819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19820#[inline]
19821#[target_feature(enable = "avx512f")]
19822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19823#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19824#[rustc_legacy_const_generics(1)]
19825pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19826    unsafe {
19827        static_assert_uimm_bits!(IMM8, 8);
19828        if IMM8 >= 64 {
19829            _mm512_setzero_si512()
19830        } else {
19831            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
19832        }
19833    }
19834}
19835
19836/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19837///
19838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19839#[inline]
19840#[target_feature(enable = "avx512f")]
19841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19842#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19843#[rustc_legacy_const_generics(3)]
19844pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19845    unsafe {
19846        static_assert_uimm_bits!(IMM8, 8);
19847        let shf = if IMM8 >= 64 {
19848            u64x8::ZERO
19849        } else {
19850            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
19851        };
19852        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
19853    }
19854}
19855
19856/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19857///
19858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
19859#[inline]
19860#[target_feature(enable = "avx512f")]
19861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19862#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19863#[rustc_legacy_const_generics(2)]
19864pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19865    unsafe {
19866        static_assert_uimm_bits!(IMM8, 8);
19867        if IMM8 >= 64 {
19868            _mm512_setzero_si512()
19869        } else {
19870            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
19871            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
19872        }
19873    }
19874}
19875
19876/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19877///
19878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
19879#[inline]
19880#[target_feature(enable = "avx512f,avx512vl")]
19881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19882#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19883#[rustc_legacy_const_generics(3)]
19884pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19885    unsafe {
19886        static_assert_uimm_bits!(IMM8, 8);
19887        let r = if IMM8 >= 64 {
19888            u64x4::ZERO
19889        } else {
19890            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
19891        };
19892        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
19893    }
19894}
19895
19896/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19897///
19898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
19899#[inline]
19900#[target_feature(enable = "avx512f,avx512vl")]
19901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19902#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19903#[rustc_legacy_const_generics(2)]
19904pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19905    unsafe {
19906        static_assert_uimm_bits!(IMM8, 8);
19907        if IMM8 >= 64 {
19908            _mm256_setzero_si256()
19909        } else {
19910            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
19911            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
19912        }
19913    }
19914}
19915
19916/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19917///
19918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
19919#[inline]
19920#[target_feature(enable = "avx512f,avx512vl")]
19921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19922#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19923#[rustc_legacy_const_generics(3)]
19924pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19925    unsafe {
19926        static_assert_uimm_bits!(IMM8, 8);
19927        let r = if IMM8 >= 64 {
19928            u64x2::ZERO
19929        } else {
19930            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
19931        };
19932        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
19933    }
19934}
19935
19936/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19937///
19938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
19939#[inline]
19940#[target_feature(enable = "avx512f,avx512vl")]
19941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19942#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19943#[rustc_legacy_const_generics(2)]
19944pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19945    unsafe {
19946        static_assert_uimm_bits!(IMM8, 8);
19947        if IMM8 >= 64 {
19948            _mm_setzero_si128()
19949        } else {
19950            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
19951            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
19952        }
19953    }
19954}
19955
19956/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19957///
19958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
19959#[inline]
19960#[target_feature(enable = "avx512f")]
19961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19962#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
19963#[rustc_legacy_const_generics(1)]
19964pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19965    unsafe {
19966        static_assert_uimm_bits!(IMM8, 8);
19967        if IMM8 >= 64 {
19968            _mm512_setzero_si512()
19969        } else {
19970            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
19971        }
19972    }
19973}
19974
19975/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19976///
19977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
19978#[inline]
19979#[target_feature(enable = "avx512f")]
19980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19981#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
19982#[rustc_legacy_const_generics(3)]
19983pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19984    unsafe {
19985        static_assert_uimm_bits!(IMM8, 8);
19986        let shf = if IMM8 >= 64 {
19987            u64x8::ZERO
19988        } else {
19989            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
19990        };
19991        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
19992    }
19993}
19994
19995/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19996///
19997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
19998#[inline]
19999#[target_feature(enable = "avx512f")]
20000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20001#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20002#[rustc_legacy_const_generics(2)]
20003pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20004    unsafe {
20005        static_assert_uimm_bits!(IMM8, 8);
20006        if IMM8 >= 64 {
20007            _mm512_setzero_si512()
20008        } else {
20009            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20010            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20011        }
20012    }
20013}
20014
20015/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20016///
20017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20018#[inline]
20019#[target_feature(enable = "avx512f,avx512vl")]
20020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20021#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20022#[rustc_legacy_const_generics(3)]
20023pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20024    unsafe {
20025        static_assert_uimm_bits!(IMM8, 8);
20026        let r = if IMM8 >= 64 {
20027            u64x4::ZERO
20028        } else {
20029            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20030        };
20031        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20032    }
20033}
20034
20035/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20036///
20037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20038#[inline]
20039#[target_feature(enable = "avx512f,avx512vl")]
20040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20041#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20042#[rustc_legacy_const_generics(2)]
20043pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20044    unsafe {
20045        static_assert_uimm_bits!(IMM8, 8);
20046        if IMM8 >= 64 {
20047            _mm256_setzero_si256()
20048        } else {
20049            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20050            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20051        }
20052    }
20053}
20054
20055/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20056///
20057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20058#[inline]
20059#[target_feature(enable = "avx512f,avx512vl")]
20060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20061#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20062#[rustc_legacy_const_generics(3)]
20063pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20064    unsafe {
20065        static_assert_uimm_bits!(IMM8, 8);
20066        let r = if IMM8 >= 64 {
20067            u64x2::ZERO
20068        } else {
20069            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20070        };
20071        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20072    }
20073}
20074
20075/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20076///
20077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20078#[inline]
20079#[target_feature(enable = "avx512f,avx512vl")]
20080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20081#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20082#[rustc_legacy_const_generics(2)]
20083pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20084    unsafe {
20085        static_assert_uimm_bits!(IMM8, 8);
20086        if IMM8 >= 64 {
20087            _mm_setzero_si128()
20088        } else {
20089            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20090            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20091        }
20092    }
20093}
20094
20095/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20096///
20097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20098#[inline]
20099#[target_feature(enable = "avx512f")]
20100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20101#[cfg_attr(test, assert_instr(vpslld))]
20102pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20103    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20104}
20105
20106/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20107///
20108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20109#[inline]
20110#[target_feature(enable = "avx512f")]
20111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20112#[cfg_attr(test, assert_instr(vpslld))]
20113pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20114    unsafe {
20115        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20116        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20117    }
20118}
20119
20120/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20121///
20122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20123#[inline]
20124#[target_feature(enable = "avx512f")]
20125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20126#[cfg_attr(test, assert_instr(vpslld))]
20127pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20128    unsafe {
20129        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20130        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20131    }
20132}
20133
20134/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20135///
20136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20137#[inline]
20138#[target_feature(enable = "avx512f,avx512vl")]
20139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20140#[cfg_attr(test, assert_instr(vpslld))]
20141pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20142    unsafe {
20143        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20144        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20145    }
20146}
20147
20148/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20149///
20150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20151#[inline]
20152#[target_feature(enable = "avx512f,avx512vl")]
20153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20154#[cfg_attr(test, assert_instr(vpslld))]
20155pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20156    unsafe {
20157        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20158        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20159    }
20160}
20161
20162/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20163///
20164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20165#[inline]
20166#[target_feature(enable = "avx512f,avx512vl")]
20167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20168#[cfg_attr(test, assert_instr(vpslld))]
20169pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20170    unsafe {
20171        let shf = _mm_sll_epi32(a, count).as_i32x4();
20172        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20173    }
20174}
20175
20176/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20177///
20178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20179#[inline]
20180#[target_feature(enable = "avx512f,avx512vl")]
20181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20182#[cfg_attr(test, assert_instr(vpslld))]
20183pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20184    unsafe {
20185        let shf = _mm_sll_epi32(a, count).as_i32x4();
20186        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20187    }
20188}
20189
20190/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20191///
20192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20193#[inline]
20194#[target_feature(enable = "avx512f")]
20195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20196#[cfg_attr(test, assert_instr(vpsrld))]
20197pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20198    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20199}
20200
20201/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20202///
20203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20204#[inline]
20205#[target_feature(enable = "avx512f")]
20206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20207#[cfg_attr(test, assert_instr(vpsrld))]
20208pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20209    unsafe {
20210        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20211        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20212    }
20213}
20214
20215/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20216///
20217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20218#[inline]
20219#[target_feature(enable = "avx512f")]
20220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20221#[cfg_attr(test, assert_instr(vpsrld))]
20222pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20223    unsafe {
20224        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20225        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20226    }
20227}
20228
20229/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20230///
20231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20232#[inline]
20233#[target_feature(enable = "avx512f,avx512vl")]
20234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20235#[cfg_attr(test, assert_instr(vpsrld))]
20236pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20237    unsafe {
20238        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20239        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20240    }
20241}
20242
20243/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20244///
20245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20246#[inline]
20247#[target_feature(enable = "avx512f,avx512vl")]
20248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20249#[cfg_attr(test, assert_instr(vpsrld))]
20250pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20251    unsafe {
20252        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20253        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20254    }
20255}
20256
20257/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20258///
20259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20260#[inline]
20261#[target_feature(enable = "avx512f,avx512vl")]
20262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20263#[cfg_attr(test, assert_instr(vpsrld))]
20264pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20265    unsafe {
20266        let shf = _mm_srl_epi32(a, count).as_i32x4();
20267        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20268    }
20269}
20270
20271/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20272///
20273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20274#[inline]
20275#[target_feature(enable = "avx512f,avx512vl")]
20276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20277#[cfg_attr(test, assert_instr(vpsrld))]
20278pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20279    unsafe {
20280        let shf = _mm_srl_epi32(a, count).as_i32x4();
20281        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20282    }
20283}
20284
20285/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20286///
20287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20288#[inline]
20289#[target_feature(enable = "avx512f")]
20290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20291#[cfg_attr(test, assert_instr(vpsllq))]
20292pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20293    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20294}
20295
20296/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20297///
20298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20299#[inline]
20300#[target_feature(enable = "avx512f")]
20301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20302#[cfg_attr(test, assert_instr(vpsllq))]
20303pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20304    unsafe {
20305        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20306        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20307    }
20308}
20309
20310/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20311///
20312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20313#[inline]
20314#[target_feature(enable = "avx512f")]
20315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20316#[cfg_attr(test, assert_instr(vpsllq))]
20317pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20318    unsafe {
20319        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20320        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20321    }
20322}
20323
20324/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20325///
20326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20327#[inline]
20328#[target_feature(enable = "avx512f,avx512vl")]
20329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20330#[cfg_attr(test, assert_instr(vpsllq))]
20331pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20332    unsafe {
20333        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20334        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20335    }
20336}
20337
20338/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20339///
20340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20341#[inline]
20342#[target_feature(enable = "avx512f,avx512vl")]
20343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20344#[cfg_attr(test, assert_instr(vpsllq))]
20345pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20346    unsafe {
20347        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20348        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20349    }
20350}
20351
20352/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20353///
20354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20355#[inline]
20356#[target_feature(enable = "avx512f,avx512vl")]
20357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20358#[cfg_attr(test, assert_instr(vpsllq))]
20359pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20360    unsafe {
20361        let shf = _mm_sll_epi64(a, count).as_i64x2();
20362        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20363    }
20364}
20365
20366/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20367///
20368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20369#[inline]
20370#[target_feature(enable = "avx512f,avx512vl")]
20371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20372#[cfg_attr(test, assert_instr(vpsllq))]
20373pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20374    unsafe {
20375        let shf = _mm_sll_epi64(a, count).as_i64x2();
20376        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20377    }
20378}
20379
20380/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20381///
20382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20383#[inline]
20384#[target_feature(enable = "avx512f")]
20385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20386#[cfg_attr(test, assert_instr(vpsrlq))]
20387pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20388    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20389}
20390
20391/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20392///
20393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20394#[inline]
20395#[target_feature(enable = "avx512f")]
20396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20397#[cfg_attr(test, assert_instr(vpsrlq))]
20398pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20399    unsafe {
20400        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20401        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20402    }
20403}
20404
20405/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20406///
20407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20408#[inline]
20409#[target_feature(enable = "avx512f")]
20410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20411#[cfg_attr(test, assert_instr(vpsrlq))]
20412pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20413    unsafe {
20414        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20415        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20416    }
20417}
20418
20419/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20420///
20421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20422#[inline]
20423#[target_feature(enable = "avx512f,avx512vl")]
20424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20425#[cfg_attr(test, assert_instr(vpsrlq))]
20426pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20427    unsafe {
20428        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20429        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20430    }
20431}
20432
20433/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20434///
20435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20436#[inline]
20437#[target_feature(enable = "avx512f,avx512vl")]
20438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20439#[cfg_attr(test, assert_instr(vpsrlq))]
20440pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20441    unsafe {
20442        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20443        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20444    }
20445}
20446
20447/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20448///
20449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20450#[inline]
20451#[target_feature(enable = "avx512f,avx512vl")]
20452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20453#[cfg_attr(test, assert_instr(vpsrlq))]
20454pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20455    unsafe {
20456        let shf = _mm_srl_epi64(a, count).as_i64x2();
20457        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20458    }
20459}
20460
20461/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20462///
20463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20464#[inline]
20465#[target_feature(enable = "avx512f,avx512vl")]
20466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20467#[cfg_attr(test, assert_instr(vpsrlq))]
20468pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20469    unsafe {
20470        let shf = _mm_srl_epi64(a, count).as_i64x2();
20471        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20472    }
20473}
20474
20475/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20476///
20477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20478#[inline]
20479#[target_feature(enable = "avx512f")]
20480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20481#[cfg_attr(test, assert_instr(vpsrad))]
20482pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20483    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
20484}
20485
20486/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20487///
20488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20489#[inline]
20490#[target_feature(enable = "avx512f")]
20491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20492#[cfg_attr(test, assert_instr(vpsrad))]
20493pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20494    unsafe {
20495        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20496        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20497    }
20498}
20499
20500/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20501///
20502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20503#[inline]
20504#[target_feature(enable = "avx512f")]
20505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20506#[cfg_attr(test, assert_instr(vpsrad))]
20507pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20508    unsafe {
20509        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20510        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20511    }
20512}
20513
20514/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20515///
20516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20517#[inline]
20518#[target_feature(enable = "avx512f,avx512vl")]
20519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20520#[cfg_attr(test, assert_instr(vpsrad))]
20521pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20522    unsafe {
20523        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20524        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20525    }
20526}
20527
20528/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20529///
20530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20531#[inline]
20532#[target_feature(enable = "avx512f,avx512vl")]
20533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20534#[cfg_attr(test, assert_instr(vpsrad))]
20535pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20536    unsafe {
20537        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20538        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20539    }
20540}
20541
20542/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20543///
20544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20545#[inline]
20546#[target_feature(enable = "avx512f,avx512vl")]
20547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20548#[cfg_attr(test, assert_instr(vpsrad))]
20549pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20550    unsafe {
20551        let shf = _mm_sra_epi32(a, count).as_i32x4();
20552        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20553    }
20554}
20555
20556/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20559#[inline]
20560#[target_feature(enable = "avx512f,avx512vl")]
20561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20562#[cfg_attr(test, assert_instr(vpsrad))]
20563pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20564    unsafe {
20565        let shf = _mm_sra_epi32(a, count).as_i32x4();
20566        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20567    }
20568}
20569
20570/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20571///
20572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20573#[inline]
20574#[target_feature(enable = "avx512f")]
20575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20576#[cfg_attr(test, assert_instr(vpsraq))]
20577pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20578    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
20579}
20580
20581/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20582///
20583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20584#[inline]
20585#[target_feature(enable = "avx512f")]
20586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20587#[cfg_attr(test, assert_instr(vpsraq))]
20588pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20589    unsafe {
20590        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20591        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20592    }
20593}
20594
20595/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20596///
20597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20598#[inline]
20599#[target_feature(enable = "avx512f")]
20600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20601#[cfg_attr(test, assert_instr(vpsraq))]
20602pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20603    unsafe {
20604        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20605        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20606    }
20607}
20608
20609/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20610///
20611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20612#[inline]
20613#[target_feature(enable = "avx512f,avx512vl")]
20614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20615#[cfg_attr(test, assert_instr(vpsraq))]
20616pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20617    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20618}
20619
20620/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20621///
20622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20623#[inline]
20624#[target_feature(enable = "avx512f,avx512vl")]
20625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20626#[cfg_attr(test, assert_instr(vpsraq))]
20627pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20628    unsafe {
20629        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20630        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20631    }
20632}
20633
20634/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20635///
20636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20637#[inline]
20638#[target_feature(enable = "avx512f,avx512vl")]
20639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20640#[cfg_attr(test, assert_instr(vpsraq))]
20641pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20642    unsafe {
20643        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20644        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20645    }
20646}
20647
20648/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20649///
20650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20651#[inline]
20652#[target_feature(enable = "avx512f,avx512vl")]
20653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20654#[cfg_attr(test, assert_instr(vpsraq))]
20655pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20656    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20657}
20658
20659/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20660///
20661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20662#[inline]
20663#[target_feature(enable = "avx512f,avx512vl")]
20664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20665#[cfg_attr(test, assert_instr(vpsraq))]
20666pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20667    unsafe {
20668        let shf = _mm_sra_epi64(a, count).as_i64x2();
20669        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20670    }
20671}
20672
20673/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20674///
20675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20676#[inline]
20677#[target_feature(enable = "avx512f,avx512vl")]
20678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20679#[cfg_attr(test, assert_instr(vpsraq))]
20680pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20681    unsafe {
20682        let shf = _mm_sra_epi64(a, count).as_i64x2();
20683        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20684    }
20685}
20686
20687/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20688///
20689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20690#[inline]
20691#[target_feature(enable = "avx512f")]
20692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20693#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20694#[rustc_legacy_const_generics(1)]
20695pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20696    unsafe {
20697        static_assert_uimm_bits!(IMM8, 8);
20698        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
20699    }
20700}
20701
20702/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20703///
20704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20705#[inline]
20706#[target_feature(enable = "avx512f")]
20707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20708#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20709#[rustc_legacy_const_generics(3)]
20710pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20711    unsafe {
20712        static_assert_uimm_bits!(IMM8, 8);
20713        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20714        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
20715    }
20716}
20717
20718/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20719///
20720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20721#[inline]
20722#[target_feature(enable = "avx512f")]
20723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20724#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20725#[rustc_legacy_const_generics(2)]
20726pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20727    unsafe {
20728        static_assert_uimm_bits!(IMM8, 8);
20729        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20730        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
20731    }
20732}
20733
20734/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20735///
20736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20737#[inline]
20738#[target_feature(enable = "avx512f,avx512vl")]
20739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20740#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20741#[rustc_legacy_const_generics(3)]
20742pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20743    unsafe {
20744        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20745        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
20746    }
20747}
20748
20749/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20750///
20751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20752#[inline]
20753#[target_feature(enable = "avx512f,avx512vl")]
20754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20755#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20756#[rustc_legacy_const_generics(2)]
20757pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20758    unsafe {
20759        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20760        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
20761    }
20762}
20763
20764/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20765///
20766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20767#[inline]
20768#[target_feature(enable = "avx512f,avx512vl")]
20769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20770#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20771#[rustc_legacy_const_generics(3)]
20772pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20773    unsafe {
20774        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20775        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
20776    }
20777}
20778
20779/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20780///
20781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20782#[inline]
20783#[target_feature(enable = "avx512f,avx512vl")]
20784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20785#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20786#[rustc_legacy_const_generics(2)]
20787pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20788    unsafe {
20789        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20790        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
20791    }
20792}
20793
20794/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20795///
20796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20797#[inline]
20798#[target_feature(enable = "avx512f")]
20799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20800#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20801#[rustc_legacy_const_generics(1)]
20802pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20803    unsafe {
20804        static_assert_uimm_bits!(IMM8, 8);
20805        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
20806    }
20807}
20808
20809/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20810///
20811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20812#[inline]
20813#[target_feature(enable = "avx512f")]
20814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20815#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20816#[rustc_legacy_const_generics(3)]
20817pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20818    unsafe {
20819        static_assert_uimm_bits!(IMM8, 8);
20820        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20821        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20822    }
20823}
20824
20825/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20826///
20827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20828#[inline]
20829#[target_feature(enable = "avx512f")]
20830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20831#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20832#[rustc_legacy_const_generics(2)]
20833pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20834    unsafe {
20835        static_assert_uimm_bits!(IMM8, 8);
20836        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20837        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20838    }
20839}
20840
20841/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20842///
20843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20844#[inline]
20845#[target_feature(enable = "avx512f,avx512vl")]
20846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20847#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20848#[rustc_legacy_const_generics(1)]
20849pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20850    unsafe {
20851        static_assert_uimm_bits!(IMM8, 8);
20852        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
20853    }
20854}
20855
20856/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20857///
20858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
20859#[inline]
20860#[target_feature(enable = "avx512f,avx512vl")]
20861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20862#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20863#[rustc_legacy_const_generics(3)]
20864pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20865    unsafe {
20866        static_assert_uimm_bits!(IMM8, 8);
20867        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20868        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20869    }
20870}
20871
20872/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20873///
20874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
20875#[inline]
20876#[target_feature(enable = "avx512f,avx512vl")]
20877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20878#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20879#[rustc_legacy_const_generics(2)]
20880pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20881    unsafe {
20882        static_assert_uimm_bits!(IMM8, 8);
20883        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20884        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20885    }
20886}
20887
20888/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20889///
20890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
20891#[inline]
20892#[target_feature(enable = "avx512f,avx512vl")]
20893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20894#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20895#[rustc_legacy_const_generics(1)]
20896pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
20897    unsafe {
20898        static_assert_uimm_bits!(IMM8, 8);
20899        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
20900    }
20901}
20902
20903/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20904///
20905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
20906#[inline]
20907#[target_feature(enable = "avx512f,avx512vl")]
20908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20909#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20910#[rustc_legacy_const_generics(3)]
20911pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20912    unsafe {
20913        static_assert_uimm_bits!(IMM8, 8);
20914        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
20915        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20916    }
20917}
20918
20919/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20920///
20921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
20922#[inline]
20923#[target_feature(enable = "avx512f,avx512vl")]
20924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20925#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20926#[rustc_legacy_const_generics(2)]
20927pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20928    unsafe {
20929        static_assert_uimm_bits!(IMM8, 8);
20930        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
20931        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20932    }
20933}
20934
20935/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
20936///
20937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
20938#[inline]
20939#[target_feature(enable = "avx512f")]
20940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20941#[cfg_attr(test, assert_instr(vpsravd))]
20942pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
20943    unsafe {
20944        let count = count.as_u32x16();
20945        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
20946        let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
20947        simd_shr(a.as_i32x16(), count).as_m512i()
20948    }
20949}
20950
20951/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20952///
20953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
20954#[inline]
20955#[target_feature(enable = "avx512f")]
20956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20957#[cfg_attr(test, assert_instr(vpsravd))]
20958pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20959    unsafe {
20960        let shf = _mm512_srav_epi32(a, count).as_i32x16();
20961        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20962    }
20963}
20964
20965/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20966///
20967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
20968#[inline]
20969#[target_feature(enable = "avx512f")]
20970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20971#[cfg_attr(test, assert_instr(vpsravd))]
20972pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20973    unsafe {
20974        let shf = _mm512_srav_epi32(a, count).as_i32x16();
20975        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20976    }
20977}
20978
20979/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20980///
20981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
20982#[inline]
20983#[target_feature(enable = "avx512f,avx512vl")]
20984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20985#[cfg_attr(test, assert_instr(vpsravd))]
20986pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20987    unsafe {
20988        let shf = _mm256_srav_epi32(a, count).as_i32x8();
20989        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20990    }
20991}
20992
20993/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20994///
20995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
20996#[inline]
20997#[target_feature(enable = "avx512f,avx512vl")]
20998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20999#[cfg_attr(test, assert_instr(vpsravd))]
21000pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21001    unsafe {
21002        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21003        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21004    }
21005}
21006
21007/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21008///
21009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21010#[inline]
21011#[target_feature(enable = "avx512f,avx512vl")]
21012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21013#[cfg_attr(test, assert_instr(vpsravd))]
21014pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21015    unsafe {
21016        let shf = _mm_srav_epi32(a, count).as_i32x4();
21017        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21018    }
21019}
21020
21021/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21022///
21023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21024#[inline]
21025#[target_feature(enable = "avx512f,avx512vl")]
21026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21027#[cfg_attr(test, assert_instr(vpsravd))]
21028pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21029    unsafe {
21030        let shf = _mm_srav_epi32(a, count).as_i32x4();
21031        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21032    }
21033}
21034
21035/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21036///
21037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21038#[inline]
21039#[target_feature(enable = "avx512f")]
21040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21041#[cfg_attr(test, assert_instr(vpsravq))]
21042pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21043    unsafe {
21044        let count = count.as_u64x8();
21045        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
21046        let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
21047        simd_shr(a.as_i64x8(), count).as_m512i()
21048    }
21049}
21050
21051/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21052///
21053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21054#[inline]
21055#[target_feature(enable = "avx512f")]
21056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21057#[cfg_attr(test, assert_instr(vpsravq))]
21058pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21059    unsafe {
21060        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21061        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21062    }
21063}
21064
21065/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21066///
21067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21068#[inline]
21069#[target_feature(enable = "avx512f")]
21070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21071#[cfg_attr(test, assert_instr(vpsravq))]
21072pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21073    unsafe {
21074        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21075        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21076    }
21077}
21078
21079/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21080///
21081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21082#[inline]
21083#[target_feature(enable = "avx512f,avx512vl")]
21084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21085#[cfg_attr(test, assert_instr(vpsravq))]
21086pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21087    unsafe {
21088        let count = count.as_u64x4();
21089        let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
21090        let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
21091        simd_shr(a.as_i64x4(), count).as_m256i()
21092    }
21093}
21094
21095/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21096///
21097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21098#[inline]
21099#[target_feature(enable = "avx512f,avx512vl")]
21100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21101#[cfg_attr(test, assert_instr(vpsravq))]
21102pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21103    unsafe {
21104        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21105        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21106    }
21107}
21108
21109/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21110///
21111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21112#[inline]
21113#[target_feature(enable = "avx512f,avx512vl")]
21114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21115#[cfg_attr(test, assert_instr(vpsravq))]
21116pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21117    unsafe {
21118        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21119        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21120    }
21121}
21122
21123/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21124///
21125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21126#[inline]
21127#[target_feature(enable = "avx512f,avx512vl")]
21128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21129#[cfg_attr(test, assert_instr(vpsravq))]
21130pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21131    unsafe {
21132        let count = count.as_u64x2();
21133        let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
21134        let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
21135        simd_shr(a.as_i64x2(), count).as_m128i()
21136    }
21137}
21138
21139/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21140///
21141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21142#[inline]
21143#[target_feature(enable = "avx512f,avx512vl")]
21144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21145#[cfg_attr(test, assert_instr(vpsravq))]
21146pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21147    unsafe {
21148        let shf = _mm_srav_epi64(a, count).as_i64x2();
21149        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21150    }
21151}
21152
21153/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21154///
21155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21156#[inline]
21157#[target_feature(enable = "avx512f,avx512vl")]
21158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21159#[cfg_attr(test, assert_instr(vpsravq))]
21160pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21161    unsafe {
21162        let shf = _mm_srav_epi64(a, count).as_i64x2();
21163        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21164    }
21165}
21166
21167/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21168///
21169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21170#[inline]
21171#[target_feature(enable = "avx512f")]
21172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21173#[cfg_attr(test, assert_instr(vprolvd))]
21174pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21175    unsafe {
21176        transmute(simd_funnel_shl(
21177            a.as_u32x16(),
21178            a.as_u32x16(),
21179            simd_and(b.as_u32x16(), u32x16::splat(31)),
21180        ))
21181    }
21182}
21183
21184/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21185///
21186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21187#[inline]
21188#[target_feature(enable = "avx512f")]
21189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21190#[cfg_attr(test, assert_instr(vprolvd))]
21191pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21192    unsafe {
21193        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21194        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21195    }
21196}
21197
21198/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21199///
21200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21201#[inline]
21202#[target_feature(enable = "avx512f")]
21203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21204#[cfg_attr(test, assert_instr(vprolvd))]
21205pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21206    unsafe {
21207        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21208        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21209    }
21210}
21211
21212/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21213///
21214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21215#[inline]
21216#[target_feature(enable = "avx512f,avx512vl")]
21217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21218#[cfg_attr(test, assert_instr(vprolvd))]
21219pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21220    unsafe {
21221        transmute(simd_funnel_shl(
21222            a.as_u32x8(),
21223            a.as_u32x8(),
21224            simd_and(b.as_u32x8(), u32x8::splat(31)),
21225        ))
21226    }
21227}
21228
21229/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21230///
21231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21232#[inline]
21233#[target_feature(enable = "avx512f,avx512vl")]
21234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21235#[cfg_attr(test, assert_instr(vprolvd))]
21236pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21237    unsafe {
21238        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21239        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21240    }
21241}
21242
21243/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21244///
21245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21246#[inline]
21247#[target_feature(enable = "avx512f,avx512vl")]
21248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21249#[cfg_attr(test, assert_instr(vprolvd))]
21250pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21251    unsafe {
21252        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21253        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21254    }
21255}
21256
21257/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21258///
21259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21260#[inline]
21261#[target_feature(enable = "avx512f,avx512vl")]
21262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21263#[cfg_attr(test, assert_instr(vprolvd))]
21264pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21265    unsafe {
21266        transmute(simd_funnel_shl(
21267            a.as_u32x4(),
21268            a.as_u32x4(),
21269            simd_and(b.as_u32x4(), u32x4::splat(31)),
21270        ))
21271    }
21272}
21273
21274/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21275///
21276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21277#[inline]
21278#[target_feature(enable = "avx512f,avx512vl")]
21279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21280#[cfg_attr(test, assert_instr(vprolvd))]
21281pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21282    unsafe {
21283        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21284        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21285    }
21286}
21287
21288/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21289///
21290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21291#[inline]
21292#[target_feature(enable = "avx512f,avx512vl")]
21293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21294#[cfg_attr(test, assert_instr(vprolvd))]
21295pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21296    unsafe {
21297        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21298        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21299    }
21300}
21301
21302/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21303///
21304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21305#[inline]
21306#[target_feature(enable = "avx512f")]
21307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21308#[cfg_attr(test, assert_instr(vprorvd))]
21309pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21310    unsafe {
21311        transmute(simd_funnel_shr(
21312            a.as_u32x16(),
21313            a.as_u32x16(),
21314            simd_and(b.as_u32x16(), u32x16::splat(31)),
21315        ))
21316    }
21317}
21318
21319/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21320///
21321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21322#[inline]
21323#[target_feature(enable = "avx512f")]
21324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21325#[cfg_attr(test, assert_instr(vprorvd))]
21326pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21327    unsafe {
21328        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21329        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21330    }
21331}
21332
21333/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21334///
21335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21336#[inline]
21337#[target_feature(enable = "avx512f")]
21338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21339#[cfg_attr(test, assert_instr(vprorvd))]
21340pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21341    unsafe {
21342        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21343        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21344    }
21345}
21346
21347/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21348///
21349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21350#[inline]
21351#[target_feature(enable = "avx512f,avx512vl")]
21352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21353#[cfg_attr(test, assert_instr(vprorvd))]
21354pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21355    unsafe {
21356        transmute(simd_funnel_shr(
21357            a.as_u32x8(),
21358            a.as_u32x8(),
21359            simd_and(b.as_u32x8(), u32x8::splat(31)),
21360        ))
21361    }
21362}
21363
21364/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21365///
21366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21367#[inline]
21368#[target_feature(enable = "avx512f,avx512vl")]
21369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21370#[cfg_attr(test, assert_instr(vprorvd))]
21371pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21372    unsafe {
21373        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21374        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
21375    }
21376}
21377
21378/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21379///
21380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21381#[inline]
21382#[target_feature(enable = "avx512f,avx512vl")]
21383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21384#[cfg_attr(test, assert_instr(vprorvd))]
21385pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21386    unsafe {
21387        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21388        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
21389    }
21390}
21391
21392/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21393///
21394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21395#[inline]
21396#[target_feature(enable = "avx512f,avx512vl")]
21397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21398#[cfg_attr(test, assert_instr(vprorvd))]
21399pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21400    unsafe {
21401        transmute(simd_funnel_shr(
21402            a.as_u32x4(),
21403            a.as_u32x4(),
21404            simd_and(b.as_u32x4(), u32x4::splat(31)),
21405        ))
21406    }
21407}
21408
21409/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21410///
21411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21412#[inline]
21413#[target_feature(enable = "avx512f,avx512vl")]
21414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21415#[cfg_attr(test, assert_instr(vprorvd))]
21416pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21417    unsafe {
21418        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21419        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
21420    }
21421}
21422
21423/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21424///
21425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21426#[inline]
21427#[target_feature(enable = "avx512f,avx512vl")]
21428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21429#[cfg_attr(test, assert_instr(vprorvd))]
21430pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21431    unsafe {
21432        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21433        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
21434    }
21435}
21436
21437/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21438///
21439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21440#[inline]
21441#[target_feature(enable = "avx512f")]
21442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21443#[cfg_attr(test, assert_instr(vprolvq))]
21444pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21445    unsafe {
21446        transmute(simd_funnel_shl(
21447            a.as_u64x8(),
21448            a.as_u64x8(),
21449            simd_and(b.as_u64x8(), u64x8::splat(63)),
21450        ))
21451    }
21452}
21453
21454/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21455///
21456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21457#[inline]
21458#[target_feature(enable = "avx512f")]
21459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21460#[cfg_attr(test, assert_instr(vprolvq))]
21461pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21462    unsafe {
21463        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21464        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
21465    }
21466}
21467
21468/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21469///
21470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21471#[inline]
21472#[target_feature(enable = "avx512f")]
21473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21474#[cfg_attr(test, assert_instr(vprolvq))]
21475pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21476    unsafe {
21477        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21478        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
21479    }
21480}
21481
21482/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21483///
21484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21485#[inline]
21486#[target_feature(enable = "avx512f,avx512vl")]
21487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21488#[cfg_attr(test, assert_instr(vprolvq))]
21489pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21490    unsafe {
21491        transmute(simd_funnel_shl(
21492            a.as_u64x4(),
21493            a.as_u64x4(),
21494            simd_and(b.as_u64x4(), u64x4::splat(63)),
21495        ))
21496    }
21497}
21498
21499/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21500///
21501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21502#[inline]
21503#[target_feature(enable = "avx512f,avx512vl")]
21504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21505#[cfg_attr(test, assert_instr(vprolvq))]
21506pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21507    unsafe {
21508        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21509        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
21510    }
21511}
21512
21513/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21514///
21515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21516#[inline]
21517#[target_feature(enable = "avx512f,avx512vl")]
21518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21519#[cfg_attr(test, assert_instr(vprolvq))]
21520pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21521    unsafe {
21522        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21523        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
21524    }
21525}
21526
21527/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21528///
21529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21530#[inline]
21531#[target_feature(enable = "avx512f,avx512vl")]
21532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21533#[cfg_attr(test, assert_instr(vprolvq))]
21534pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21535    unsafe {
21536        transmute(simd_funnel_shl(
21537            a.as_u64x2(),
21538            a.as_u64x2(),
21539            simd_and(b.as_u64x2(), u64x2::splat(63)),
21540        ))
21541    }
21542}
21543
21544/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21545///
21546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21547#[inline]
21548#[target_feature(enable = "avx512f,avx512vl")]
21549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21550#[cfg_attr(test, assert_instr(vprolvq))]
21551pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21552    unsafe {
21553        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21554        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
21555    }
21556}
21557
21558/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21559///
21560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21561#[inline]
21562#[target_feature(enable = "avx512f,avx512vl")]
21563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21564#[cfg_attr(test, assert_instr(vprolvq))]
21565pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21566    unsafe {
21567        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21568        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
21569    }
21570}
21571
21572/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21573///
21574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21575#[inline]
21576#[target_feature(enable = "avx512f")]
21577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21578#[cfg_attr(test, assert_instr(vprorvq))]
21579pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21580    unsafe {
21581        transmute(simd_funnel_shr(
21582            a.as_u64x8(),
21583            a.as_u64x8(),
21584            simd_and(b.as_u64x8(), u64x8::splat(63)),
21585        ))
21586    }
21587}
21588
21589/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21590///
21591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21592#[inline]
21593#[target_feature(enable = "avx512f")]
21594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21595#[cfg_attr(test, assert_instr(vprorvq))]
21596pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21597    unsafe {
21598        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21599        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
21600    }
21601}
21602
21603/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21604///
21605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21606#[inline]
21607#[target_feature(enable = "avx512f")]
21608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21609#[cfg_attr(test, assert_instr(vprorvq))]
21610pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21611    unsafe {
21612        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21613        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
21614    }
21615}
21616
21617/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21618///
21619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21620#[inline]
21621#[target_feature(enable = "avx512f,avx512vl")]
21622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21623#[cfg_attr(test, assert_instr(vprorvq))]
21624pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21625    unsafe {
21626        transmute(simd_funnel_shr(
21627            a.as_u64x4(),
21628            a.as_u64x4(),
21629            simd_and(b.as_u64x4(), u64x4::splat(63)),
21630        ))
21631    }
21632}
21633
21634/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21635///
21636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21637#[inline]
21638#[target_feature(enable = "avx512f,avx512vl")]
21639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21640#[cfg_attr(test, assert_instr(vprorvq))]
21641pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21642    unsafe {
21643        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21644        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
21645    }
21646}
21647
21648/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21649///
21650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21651#[inline]
21652#[target_feature(enable = "avx512f,avx512vl")]
21653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21654#[cfg_attr(test, assert_instr(vprorvq))]
21655pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21656    unsafe {
21657        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21658        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
21659    }
21660}
21661
21662/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21663///
21664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21665#[inline]
21666#[target_feature(enable = "avx512f,avx512vl")]
21667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21668#[cfg_attr(test, assert_instr(vprorvq))]
21669pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21670    unsafe {
21671        transmute(simd_funnel_shr(
21672            a.as_u64x2(),
21673            a.as_u64x2(),
21674            simd_and(b.as_u64x2(), u64x2::splat(63)),
21675        ))
21676    }
21677}
21678
21679/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21680///
21681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21682#[inline]
21683#[target_feature(enable = "avx512f,avx512vl")]
21684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21685#[cfg_attr(test, assert_instr(vprorvq))]
21686pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21687    unsafe {
21688        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21689        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
21690    }
21691}
21692
21693/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21694///
21695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21696#[inline]
21697#[target_feature(enable = "avx512f,avx512vl")]
21698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21699#[cfg_attr(test, assert_instr(vprorvq))]
21700pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21701    unsafe {
21702        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21703        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
21704    }
21705}
21706
21707/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21708///
21709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21710#[inline]
21711#[target_feature(enable = "avx512f")]
21712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21713#[cfg_attr(test, assert_instr(vpsllvd))]
21714pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21715    unsafe {
21716        let count = count.as_u32x16();
21717        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
21718        let count = simd_select(no_overflow, count, u32x16::ZERO);
21719        simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
21720    }
21721}
21722
21723/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21724///
21725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21726#[inline]
21727#[target_feature(enable = "avx512f")]
21728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21729#[cfg_attr(test, assert_instr(vpsllvd))]
21730pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21731    unsafe {
21732        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21733        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21734    }
21735}
21736
21737/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21738///
21739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21740#[inline]
21741#[target_feature(enable = "avx512f")]
21742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21743#[cfg_attr(test, assert_instr(vpsllvd))]
21744pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21745    unsafe {
21746        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21747        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21748    }
21749}
21750
21751/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21752///
21753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21754#[inline]
21755#[target_feature(enable = "avx512f,avx512vl")]
21756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21757#[cfg_attr(test, assert_instr(vpsllvd))]
21758pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21759    unsafe {
21760        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21761        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21762    }
21763}
21764
21765/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21766///
21767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21768#[inline]
21769#[target_feature(enable = "avx512f,avx512vl")]
21770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21771#[cfg_attr(test, assert_instr(vpsllvd))]
21772pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21773    unsafe {
21774        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21775        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21776    }
21777}
21778
21779/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21780///
21781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21782#[inline]
21783#[target_feature(enable = "avx512f,avx512vl")]
21784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21785#[cfg_attr(test, assert_instr(vpsllvd))]
21786pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21787    unsafe {
21788        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21789        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21790    }
21791}
21792
21793/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21794///
21795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21796#[inline]
21797#[target_feature(enable = "avx512f,avx512vl")]
21798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21799#[cfg_attr(test, assert_instr(vpsllvd))]
21800pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21801    unsafe {
21802        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21803        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21804    }
21805}
21806
21807/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21808///
21809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21810#[inline]
21811#[target_feature(enable = "avx512f")]
21812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21813#[cfg_attr(test, assert_instr(vpsrlvd))]
21814pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21815    unsafe {
21816        let count = count.as_u32x16();
21817        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
21818        let count = simd_select(no_overflow, count, u32x16::ZERO);
21819        simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
21820    }
21821}
21822
21823/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21824///
21825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21826#[inline]
21827#[target_feature(enable = "avx512f")]
21828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21829#[cfg_attr(test, assert_instr(vpsrlvd))]
21830pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21831    unsafe {
21832        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21833        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21834    }
21835}
21836
21837/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21838///
21839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21840#[inline]
21841#[target_feature(enable = "avx512f")]
21842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21843#[cfg_attr(test, assert_instr(vpsrlvd))]
21844pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21845    unsafe {
21846        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21847        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21848    }
21849}
21850
21851/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21852///
21853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21854#[inline]
21855#[target_feature(enable = "avx512f,avx512vl")]
21856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21857#[cfg_attr(test, assert_instr(vpsrlvd))]
21858pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21859    unsafe {
21860        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21861        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21862    }
21863}
21864
21865/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21866///
21867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21868#[inline]
21869#[target_feature(enable = "avx512f,avx512vl")]
21870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21871#[cfg_attr(test, assert_instr(vpsrlvd))]
21872pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21873    unsafe {
21874        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21875        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21876    }
21877}
21878
21879/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21880///
21881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21882#[inline]
21883#[target_feature(enable = "avx512f,avx512vl")]
21884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21885#[cfg_attr(test, assert_instr(vpsrlvd))]
21886pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21887    unsafe {
21888        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21889        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21890    }
21891}
21892
21893/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21894///
21895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21896#[inline]
21897#[target_feature(enable = "avx512f,avx512vl")]
21898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21899#[cfg_attr(test, assert_instr(vpsrlvd))]
21900pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21901    unsafe {
21902        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21903        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21904    }
21905}
21906
21907/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21908///
21909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21910#[inline]
21911#[target_feature(enable = "avx512f")]
21912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21913#[cfg_attr(test, assert_instr(vpsllvq))]
21914pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21915    unsafe {
21916        let count = count.as_u64x8();
21917        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
21918        let count = simd_select(no_overflow, count, u64x8::ZERO);
21919        simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
21920    }
21921}
21922
21923/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21924///
21925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21926#[inline]
21927#[target_feature(enable = "avx512f")]
21928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21929#[cfg_attr(test, assert_instr(vpsllvq))]
21930pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21931    unsafe {
21932        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21933        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21934    }
21935}
21936
21937/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21938///
21939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21940#[inline]
21941#[target_feature(enable = "avx512f")]
21942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21943#[cfg_attr(test, assert_instr(vpsllvq))]
21944pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21945    unsafe {
21946        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21947        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21948    }
21949}
21950
21951/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21952///
21953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21954#[inline]
21955#[target_feature(enable = "avx512f,avx512vl")]
21956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21957#[cfg_attr(test, assert_instr(vpsllvq))]
21958pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21959    unsafe {
21960        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21961        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21962    }
21963}
21964
21965/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21966///
21967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
21968#[inline]
21969#[target_feature(enable = "avx512f,avx512vl")]
21970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21971#[cfg_attr(test, assert_instr(vpsllvq))]
21972pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21973    unsafe {
21974        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21975        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21976    }
21977}
21978
21979/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21980///
21981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
21982#[inline]
21983#[target_feature(enable = "avx512f,avx512vl")]
21984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21985#[cfg_attr(test, assert_instr(vpsllvq))]
21986pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21987    unsafe {
21988        let shf = _mm_sllv_epi64(a, count).as_i64x2();
21989        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21990    }
21991}
21992
21993/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21994///
21995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
21996#[inline]
21997#[target_feature(enable = "avx512f,avx512vl")]
21998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21999#[cfg_attr(test, assert_instr(vpsllvq))]
22000pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22001    unsafe {
22002        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22003        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22004    }
22005}
22006
22007/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22008///
22009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22010#[inline]
22011#[target_feature(enable = "avx512f")]
22012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22013#[cfg_attr(test, assert_instr(vpsrlvq))]
22014pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22015    unsafe {
22016        let count = count.as_u64x8();
22017        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
22018        let count = simd_select(no_overflow, count, u64x8::ZERO);
22019        simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
22020    }
22021}
22022
22023/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22024///
22025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22026#[inline]
22027#[target_feature(enable = "avx512f")]
22028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22029#[cfg_attr(test, assert_instr(vpsrlvq))]
22030pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22031    unsafe {
22032        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22033        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22034    }
22035}
22036
22037/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22038///
22039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22040#[inline]
22041#[target_feature(enable = "avx512f")]
22042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22043#[cfg_attr(test, assert_instr(vpsrlvq))]
22044pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22045    unsafe {
22046        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22047        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22048    }
22049}
22050
22051/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22052///
22053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22054#[inline]
22055#[target_feature(enable = "avx512f,avx512vl")]
22056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22057#[cfg_attr(test, assert_instr(vpsrlvq))]
22058pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22059    unsafe {
22060        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22061        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22062    }
22063}
22064
22065/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22066///
22067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22068#[inline]
22069#[target_feature(enable = "avx512f,avx512vl")]
22070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22071#[cfg_attr(test, assert_instr(vpsrlvq))]
22072pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22073    unsafe {
22074        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22075        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22076    }
22077}
22078
22079/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22080///
22081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22082#[inline]
22083#[target_feature(enable = "avx512f,avx512vl")]
22084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22085#[cfg_attr(test, assert_instr(vpsrlvq))]
22086pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22087    unsafe {
22088        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22089        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22090    }
22091}
22092
22093/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22094///
22095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22096#[inline]
22097#[target_feature(enable = "avx512f,avx512vl")]
22098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22099#[cfg_attr(test, assert_instr(vpsrlvq))]
22100pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22101    unsafe {
22102        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22103        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22104    }
22105}
22106
22107/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22108///
22109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22110#[inline]
22111#[target_feature(enable = "avx512f")]
22112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22113#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22114#[rustc_legacy_const_generics(1)]
22115pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22116    unsafe {
22117        static_assert_uimm_bits!(MASK, 8);
22118        simd_shuffle!(
22119            a,
22120            a,
22121            [
22122                MASK as u32 & 0b11,
22123                (MASK as u32 >> 2) & 0b11,
22124                ((MASK as u32 >> 4) & 0b11),
22125                ((MASK as u32 >> 6) & 0b11),
22126                (MASK as u32 & 0b11) + 4,
22127                ((MASK as u32 >> 2) & 0b11) + 4,
22128                ((MASK as u32 >> 4) & 0b11) + 4,
22129                ((MASK as u32 >> 6) & 0b11) + 4,
22130                (MASK as u32 & 0b11) + 8,
22131                ((MASK as u32 >> 2) & 0b11) + 8,
22132                ((MASK as u32 >> 4) & 0b11) + 8,
22133                ((MASK as u32 >> 6) & 0b11) + 8,
22134                (MASK as u32 & 0b11) + 12,
22135                ((MASK as u32 >> 2) & 0b11) + 12,
22136                ((MASK as u32 >> 4) & 0b11) + 12,
22137                ((MASK as u32 >> 6) & 0b11) + 12,
22138            ],
22139        )
22140    }
22141}
22142
22143/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22144///
22145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22146#[inline]
22147#[target_feature(enable = "avx512f")]
22148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22149#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22150#[rustc_legacy_const_generics(3)]
22151pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22152    unsafe {
22153        static_assert_uimm_bits!(MASK, 8);
22154        let r = _mm512_permute_ps::<MASK>(a);
22155        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22156    }
22157}
22158
22159/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22160///
22161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22162#[inline]
22163#[target_feature(enable = "avx512f")]
22164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22165#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22166#[rustc_legacy_const_generics(2)]
22167pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22168    unsafe {
22169        static_assert_uimm_bits!(MASK, 8);
22170        let r = _mm512_permute_ps::<MASK>(a);
22171        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22172    }
22173}
22174
22175/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22176///
22177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22178#[inline]
22179#[target_feature(enable = "avx512f,avx512vl")]
22180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22181#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22182#[rustc_legacy_const_generics(3)]
22183pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22184    unsafe {
22185        let r = _mm256_permute_ps::<MASK>(a);
22186        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22187    }
22188}
22189
22190/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22191///
22192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22193#[inline]
22194#[target_feature(enable = "avx512f,avx512vl")]
22195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22196#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22197#[rustc_legacy_const_generics(2)]
22198pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22199    unsafe {
22200        let r = _mm256_permute_ps::<MASK>(a);
22201        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22202    }
22203}
22204
22205/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22206///
22207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22208#[inline]
22209#[target_feature(enable = "avx512f,avx512vl")]
22210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22211#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22212#[rustc_legacy_const_generics(3)]
22213pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22214    unsafe {
22215        let r = _mm_permute_ps::<MASK>(a);
22216        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22217    }
22218}
22219
22220/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22221///
22222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22223#[inline]
22224#[target_feature(enable = "avx512f,avx512vl")]
22225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22226#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22227#[rustc_legacy_const_generics(2)]
22228pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22229    unsafe {
22230        let r = _mm_permute_ps::<MASK>(a);
22231        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22232    }
22233}
22234
22235/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22236///
22237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22238#[inline]
22239#[target_feature(enable = "avx512f")]
22240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22241#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22242#[rustc_legacy_const_generics(1)]
22243pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22244    unsafe {
22245        static_assert_uimm_bits!(MASK, 8);
22246        simd_shuffle!(
22247            a,
22248            a,
22249            [
22250                MASK as u32 & 0b1,
22251                ((MASK as u32 >> 1) & 0b1),
22252                ((MASK as u32 >> 2) & 0b1) + 2,
22253                ((MASK as u32 >> 3) & 0b1) + 2,
22254                ((MASK as u32 >> 4) & 0b1) + 4,
22255                ((MASK as u32 >> 5) & 0b1) + 4,
22256                ((MASK as u32 >> 6) & 0b1) + 6,
22257                ((MASK as u32 >> 7) & 0b1) + 6,
22258            ],
22259        )
22260    }
22261}
22262
22263/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22264///
22265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22266#[inline]
22267#[target_feature(enable = "avx512f")]
22268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22269#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22270#[rustc_legacy_const_generics(3)]
22271pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22272    unsafe {
22273        static_assert_uimm_bits!(MASK, 8);
22274        let r = _mm512_permute_pd::<MASK>(a);
22275        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22276    }
22277}
22278
22279/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22280///
22281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22282#[inline]
22283#[target_feature(enable = "avx512f")]
22284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22285#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22286#[rustc_legacy_const_generics(2)]
22287pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22288    unsafe {
22289        static_assert_uimm_bits!(MASK, 8);
22290        let r = _mm512_permute_pd::<MASK>(a);
22291        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22292    }
22293}
22294
22295/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22296///
22297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22298#[inline]
22299#[target_feature(enable = "avx512f,avx512vl")]
22300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22301#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22302#[rustc_legacy_const_generics(3)]
22303pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22304    unsafe {
22305        static_assert_uimm_bits!(MASK, 4);
22306        let r = _mm256_permute_pd::<MASK>(a);
22307        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22308    }
22309}
22310
22311/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22312///
22313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22314#[inline]
22315#[target_feature(enable = "avx512f,avx512vl")]
22316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22317#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22318#[rustc_legacy_const_generics(2)]
22319pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22320    unsafe {
22321        static_assert_uimm_bits!(MASK, 4);
22322        let r = _mm256_permute_pd::<MASK>(a);
22323        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22324    }
22325}
22326
22327/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22328///
22329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22330#[inline]
22331#[target_feature(enable = "avx512f,avx512vl")]
22332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22333#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22334#[rustc_legacy_const_generics(3)]
22335pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22336    unsafe {
22337        static_assert_uimm_bits!(IMM2, 2);
22338        let r = _mm_permute_pd::<IMM2>(a);
22339        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
22340    }
22341}
22342
22343/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22344///
22345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22346#[inline]
22347#[target_feature(enable = "avx512f,avx512vl")]
22348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22349#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22350#[rustc_legacy_const_generics(2)]
22351pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22352    unsafe {
22353        static_assert_uimm_bits!(IMM2, 2);
22354        let r = _mm_permute_pd::<IMM2>(a);
22355        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
22356    }
22357}
22358
22359/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22360///
22361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22362#[inline]
22363#[target_feature(enable = "avx512f")]
22364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22365#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22366#[rustc_legacy_const_generics(1)]
22367pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22368    unsafe {
22369        static_assert_uimm_bits!(MASK, 8);
22370        simd_shuffle!(
22371            a,
22372            a,
22373            [
22374                MASK as u32 & 0b11,
22375                (MASK as u32 >> 2) & 0b11,
22376                ((MASK as u32 >> 4) & 0b11),
22377                ((MASK as u32 >> 6) & 0b11),
22378                (MASK as u32 & 0b11) + 4,
22379                ((MASK as u32 >> 2) & 0b11) + 4,
22380                ((MASK as u32 >> 4) & 0b11) + 4,
22381                ((MASK as u32 >> 6) & 0b11) + 4,
22382            ],
22383        )
22384    }
22385}
22386
22387/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22388///
22389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22390#[inline]
22391#[target_feature(enable = "avx512f")]
22392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22393#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22394#[rustc_legacy_const_generics(3)]
22395pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22396    src: __m512i,
22397    k: __mmask8,
22398    a: __m512i,
22399) -> __m512i {
22400    unsafe {
22401        static_assert_uimm_bits!(MASK, 8);
22402        let r = _mm512_permutex_epi64::<MASK>(a);
22403        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
22404    }
22405}
22406
22407/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22408///
22409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22410#[inline]
22411#[target_feature(enable = "avx512f")]
22412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22413#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22414#[rustc_legacy_const_generics(2)]
22415pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22416    unsafe {
22417        static_assert_uimm_bits!(MASK, 8);
22418        let r = _mm512_permutex_epi64::<MASK>(a);
22419        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
22420    }
22421}
22422
22423/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22424///
22425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22426#[inline]
22427#[target_feature(enable = "avx512f,avx512vl")]
22428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22429#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22430#[rustc_legacy_const_generics(1)]
22431pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22432    unsafe {
22433        static_assert_uimm_bits!(MASK, 8);
22434        simd_shuffle!(
22435            a,
22436            a,
22437            [
22438                MASK as u32 & 0b11,
22439                (MASK as u32 >> 2) & 0b11,
22440                ((MASK as u32 >> 4) & 0b11),
22441                ((MASK as u32 >> 6) & 0b11),
22442            ],
22443        )
22444    }
22445}
22446
22447/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22448///
22449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22450#[inline]
22451#[target_feature(enable = "avx512f,avx512vl")]
22452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22453#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22454#[rustc_legacy_const_generics(3)]
22455pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22456    src: __m256i,
22457    k: __mmask8,
22458    a: __m256i,
22459) -> __m256i {
22460    unsafe {
22461        static_assert_uimm_bits!(MASK, 8);
22462        let r = _mm256_permutex_epi64::<MASK>(a);
22463        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
22464    }
22465}
22466
22467/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22468///
22469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22470#[inline]
22471#[target_feature(enable = "avx512f,avx512vl")]
22472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22473#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22474#[rustc_legacy_const_generics(2)]
22475pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22476    unsafe {
22477        static_assert_uimm_bits!(MASK, 8);
22478        let r = _mm256_permutex_epi64::<MASK>(a);
22479        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
22480    }
22481}
22482
22483/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22484///
22485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22486#[inline]
22487#[target_feature(enable = "avx512f")]
22488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22489#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22490#[rustc_legacy_const_generics(1)]
22491pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22492    unsafe {
22493        static_assert_uimm_bits!(MASK, 8);
22494        simd_shuffle!(
22495            a,
22496            a,
22497            [
22498                MASK as u32 & 0b11,
22499                (MASK as u32 >> 2) & 0b11,
22500                ((MASK as u32 >> 4) & 0b11),
22501                ((MASK as u32 >> 6) & 0b11),
22502                (MASK as u32 & 0b11) + 4,
22503                ((MASK as u32 >> 2) & 0b11) + 4,
22504                ((MASK as u32 >> 4) & 0b11) + 4,
22505                ((MASK as u32 >> 6) & 0b11) + 4,
22506            ],
22507        )
22508    }
22509}
22510
22511/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22512///
22513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22514#[inline]
22515#[target_feature(enable = "avx512f")]
22516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22517#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22518#[rustc_legacy_const_generics(3)]
22519pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22520    unsafe {
22521        let r = _mm512_permutex_pd::<MASK>(a);
22522        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22523    }
22524}
22525
22526/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22527///
22528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22529#[inline]
22530#[target_feature(enable = "avx512f")]
22531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22532#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22533#[rustc_legacy_const_generics(2)]
22534pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22535    unsafe {
22536        let r = _mm512_permutex_pd::<MASK>(a);
22537        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22538    }
22539}
22540
22541/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22542///
22543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22544#[inline]
22545#[target_feature(enable = "avx512f,avx512vl")]
22546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22547#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22548#[rustc_legacy_const_generics(1)]
22549pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22550    unsafe {
22551        static_assert_uimm_bits!(MASK, 8);
22552        simd_shuffle!(
22553            a,
22554            a,
22555            [
22556                MASK as u32 & 0b11,
22557                (MASK as u32 >> 2) & 0b11,
22558                ((MASK as u32 >> 4) & 0b11),
22559                ((MASK as u32 >> 6) & 0b11),
22560            ],
22561        )
22562    }
22563}
22564
22565/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22566///
22567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22568#[inline]
22569#[target_feature(enable = "avx512f,avx512vl")]
22570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22571#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22572#[rustc_legacy_const_generics(3)]
22573pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22574    unsafe {
22575        static_assert_uimm_bits!(MASK, 8);
22576        let r = _mm256_permutex_pd::<MASK>(a);
22577        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22578    }
22579}
22580
22581/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22582///
22583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22584#[inline]
22585#[target_feature(enable = "avx512f,avx512vl")]
22586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22587#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22588#[rustc_legacy_const_generics(2)]
22589pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22590    unsafe {
22591        static_assert_uimm_bits!(MASK, 8);
22592        let r = _mm256_permutex_pd::<MASK>(a);
22593        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22594    }
22595}
22596
22597/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22598///
22599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22600#[inline]
22601#[target_feature(enable = "avx512f")]
22602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22603#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22604pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22605    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22606}
22607
22608/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22609///
22610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22611#[inline]
22612#[target_feature(enable = "avx512f")]
22613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22614#[cfg_attr(test, assert_instr(vpermd))]
22615pub fn _mm512_mask_permutevar_epi32(
22616    src: __m512i,
22617    k: __mmask16,
22618    idx: __m512i,
22619    a: __m512i,
22620) -> __m512i {
22621    unsafe {
22622        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
22623        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22624    }
22625}
22626
22627/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22628///
22629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22630#[inline]
22631#[target_feature(enable = "avx512f")]
22632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22633#[cfg_attr(test, assert_instr(vpermilps))]
22634pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22635    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
22636}
22637
22638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22639///
22640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22641#[inline]
22642#[target_feature(enable = "avx512f")]
22643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22644#[cfg_attr(test, assert_instr(vpermilps))]
22645pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22646    unsafe {
22647        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22648        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22649    }
22650}
22651
22652/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22653///
22654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22655#[inline]
22656#[target_feature(enable = "avx512f")]
22657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22658#[cfg_attr(test, assert_instr(vpermilps))]
22659pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22660    unsafe {
22661        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22662        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22663    }
22664}
22665
22666/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22667///
22668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22669#[inline]
22670#[target_feature(enable = "avx512f,avx512vl")]
22671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22672#[cfg_attr(test, assert_instr(vpermilps))]
22673pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22674    unsafe {
22675        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22676        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
22677    }
22678}
22679
22680/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22681///
22682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22683#[inline]
22684#[target_feature(enable = "avx512f,avx512vl")]
22685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22686#[cfg_attr(test, assert_instr(vpermilps))]
22687pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22688    unsafe {
22689        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22690        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
22691    }
22692}
22693
22694/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22695///
22696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22697#[inline]
22698#[target_feature(enable = "avx512f,avx512vl")]
22699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22700#[cfg_attr(test, assert_instr(vpermilps))]
22701pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22702    unsafe {
22703        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22704        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
22705    }
22706}
22707
22708/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22709///
22710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22711#[inline]
22712#[target_feature(enable = "avx512f,avx512vl")]
22713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22714#[cfg_attr(test, assert_instr(vpermilps))]
22715pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22716    unsafe {
22717        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22718        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
22719    }
22720}
22721
22722/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22723///
22724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22725#[inline]
22726#[target_feature(enable = "avx512f")]
22727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22728#[cfg_attr(test, assert_instr(vpermilpd))]
22729pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22730    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22731}
22732
22733/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22734///
22735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22736#[inline]
22737#[target_feature(enable = "avx512f")]
22738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22739#[cfg_attr(test, assert_instr(vpermilpd))]
22740pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22741    unsafe {
22742        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22743        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
22744    }
22745}
22746
22747/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22748///
22749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22750#[inline]
22751#[target_feature(enable = "avx512f")]
22752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22753#[cfg_attr(test, assert_instr(vpermilpd))]
22754pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22755    unsafe {
22756        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22757        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
22758    }
22759}
22760
22761/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22762///
22763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22764#[inline]
22765#[target_feature(enable = "avx512f,avx512vl")]
22766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22767#[cfg_attr(test, assert_instr(vpermilpd))]
22768pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22769    unsafe {
22770        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22771        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
22772    }
22773}
22774
22775/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22778#[inline]
22779#[target_feature(enable = "avx512f,avx512vl")]
22780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22781#[cfg_attr(test, assert_instr(vpermilpd))]
22782pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22783    unsafe {
22784        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22785        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
22786    }
22787}
22788
22789/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22790///
22791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22792#[inline]
22793#[target_feature(enable = "avx512f,avx512vl")]
22794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22795#[cfg_attr(test, assert_instr(vpermilpd))]
22796pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22797    unsafe {
22798        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22799        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
22800    }
22801}
22802
22803/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22804///
22805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22806#[inline]
22807#[target_feature(enable = "avx512f,avx512vl")]
22808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22809#[cfg_attr(test, assert_instr(vpermilpd))]
22810pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22811    unsafe {
22812        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22813        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
22814    }
22815}
22816
22817/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22818///
22819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22820#[inline]
22821#[target_feature(enable = "avx512f")]
22822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22823#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22824pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22825    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22826}
22827
22828/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22829///
22830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22831#[inline]
22832#[target_feature(enable = "avx512f")]
22833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22834#[cfg_attr(test, assert_instr(vpermd))]
22835pub fn _mm512_mask_permutexvar_epi32(
22836    src: __m512i,
22837    k: __mmask16,
22838    idx: __m512i,
22839    a: __m512i,
22840) -> __m512i {
22841    unsafe {
22842        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22843        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22844    }
22845}
22846
22847/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22848///
22849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22850#[inline]
22851#[target_feature(enable = "avx512f")]
22852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22853#[cfg_attr(test, assert_instr(vpermd))]
22854pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22855    unsafe {
22856        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22857        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
22858    }
22859}
22860
22861/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22862///
22863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22864#[inline]
22865#[target_feature(enable = "avx512f,avx512vl")]
22866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22867#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22868pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22869    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
22870}
22871
22872/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22873///
22874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22875#[inline]
22876#[target_feature(enable = "avx512f,avx512vl")]
22877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22878#[cfg_attr(test, assert_instr(vpermd))]
22879pub fn _mm256_mask_permutexvar_epi32(
22880    src: __m256i,
22881    k: __mmask8,
22882    idx: __m256i,
22883    a: __m256i,
22884) -> __m256i {
22885    unsafe {
22886        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22887        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
22888    }
22889}
22890
22891/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22892///
22893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22894#[inline]
22895#[target_feature(enable = "avx512f,avx512vl")]
22896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22897#[cfg_attr(test, assert_instr(vpermd))]
22898pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22899    unsafe {
22900        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22901        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
22902    }
22903}
22904
22905/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22906///
22907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22908#[inline]
22909#[target_feature(enable = "avx512f")]
22910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22911#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22912pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22913    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
22914}
22915
22916/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22917///
22918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22919#[inline]
22920#[target_feature(enable = "avx512f")]
22921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22922#[cfg_attr(test, assert_instr(vpermq))]
22923pub fn _mm512_mask_permutexvar_epi64(
22924    src: __m512i,
22925    k: __mmask8,
22926    idx: __m512i,
22927    a: __m512i,
22928) -> __m512i {
22929    unsafe {
22930        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22931        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
22932    }
22933}
22934
22935/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22936///
22937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22938#[inline]
22939#[target_feature(enable = "avx512f")]
22940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22941#[cfg_attr(test, assert_instr(vpermq))]
22942pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22943    unsafe {
22944        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22945        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
22946    }
22947}
22948
22949/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22950///
22951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22952#[inline]
22953#[target_feature(enable = "avx512f,avx512vl")]
22954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22955#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22956pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22957    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22958}
22959
22960/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22961///
22962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22963#[inline]
22964#[target_feature(enable = "avx512f,avx512vl")]
22965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22966#[cfg_attr(test, assert_instr(vpermq))]
22967pub fn _mm256_mask_permutexvar_epi64(
22968    src: __m256i,
22969    k: __mmask8,
22970    idx: __m256i,
22971    a: __m256i,
22972) -> __m256i {
22973    unsafe {
22974        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22975        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
22976    }
22977}
22978
22979/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22980///
22981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
22982#[inline]
22983#[target_feature(enable = "avx512f,avx512vl")]
22984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22985#[cfg_attr(test, assert_instr(vpermq))]
22986pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22987    unsafe {
22988        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22989        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
22990    }
22991}
22992
22993/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22994///
22995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
22996#[inline]
22997#[target_feature(enable = "avx512f")]
22998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22999#[cfg_attr(test, assert_instr(vpermps))]
23000pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23001    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
23002}
23003
23004/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23005///
23006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23007#[inline]
23008#[target_feature(enable = "avx512f")]
23009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23010#[cfg_attr(test, assert_instr(vpermps))]
23011pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23012    unsafe {
23013        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23014        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23015    }
23016}
23017
23018/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23019///
23020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23021#[inline]
23022#[target_feature(enable = "avx512f")]
23023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23024#[cfg_attr(test, assert_instr(vpermps))]
23025pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23026    unsafe {
23027        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23028        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23029    }
23030}
23031
23032/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23033///
23034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23035#[inline]
23036#[target_feature(enable = "avx512f,avx512vl")]
23037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23038#[cfg_attr(test, assert_instr(vpermps))]
23039pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23040    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23041}
23042
23043/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23044///
23045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23046#[inline]
23047#[target_feature(enable = "avx512f,avx512vl")]
23048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23049#[cfg_attr(test, assert_instr(vpermps))]
23050pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23051    unsafe {
23052        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23053        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23054    }
23055}
23056
23057/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23058///
23059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23060#[inline]
23061#[target_feature(enable = "avx512f,avx512vl")]
23062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23063#[cfg_attr(test, assert_instr(vpermps))]
23064pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23065    unsafe {
23066        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23067        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23068    }
23069}
23070
23071/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23072///
23073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23074#[inline]
23075#[target_feature(enable = "avx512f")]
23076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23077#[cfg_attr(test, assert_instr(vpermpd))]
23078pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23079    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23080}
23081
23082/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23083///
23084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23085#[inline]
23086#[target_feature(enable = "avx512f")]
23087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23088#[cfg_attr(test, assert_instr(vpermpd))]
23089pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23090    unsafe {
23091        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23092        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23093    }
23094}
23095
23096/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23097///
23098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23099#[inline]
23100#[target_feature(enable = "avx512f")]
23101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23102#[cfg_attr(test, assert_instr(vpermpd))]
23103pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23104    unsafe {
23105        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23106        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23107    }
23108}
23109
23110/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23111///
23112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23113#[inline]
23114#[target_feature(enable = "avx512f,avx512vl")]
23115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23116#[cfg_attr(test, assert_instr(vpermpd))]
23117pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23118    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23119}
23120
23121/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23122///
23123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23124#[inline]
23125#[target_feature(enable = "avx512f,avx512vl")]
23126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23127#[cfg_attr(test, assert_instr(vpermpd))]
23128pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23129    unsafe {
23130        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23131        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23132    }
23133}
23134
23135/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23136///
23137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23138#[inline]
23139#[target_feature(enable = "avx512f,avx512vl")]
23140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23141#[cfg_attr(test, assert_instr(vpermpd))]
23142pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23143    unsafe {
23144        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23145        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23146    }
23147}
23148
23149/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23150///
23151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23152#[inline]
23153#[target_feature(enable = "avx512f")]
23154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23155#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23156pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23157    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23158}
23159
23160/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23161///
23162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23163#[inline]
23164#[target_feature(enable = "avx512f")]
23165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23166#[cfg_attr(test, assert_instr(vpermt2d))]
23167pub fn _mm512_mask_permutex2var_epi32(
23168    a: __m512i,
23169    k: __mmask16,
23170    idx: __m512i,
23171    b: __m512i,
23172) -> __m512i {
23173    unsafe {
23174        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23175        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23176    }
23177}
23178
23179/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23180///
23181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23182#[inline]
23183#[target_feature(enable = "avx512f")]
23184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23185#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23186pub fn _mm512_maskz_permutex2var_epi32(
23187    k: __mmask16,
23188    a: __m512i,
23189    idx: __m512i,
23190    b: __m512i,
23191) -> __m512i {
23192    unsafe {
23193        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23194        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23195    }
23196}
23197
23198/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23199///
23200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23201#[inline]
23202#[target_feature(enable = "avx512f")]
23203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23204#[cfg_attr(test, assert_instr(vpermi2d))]
23205pub fn _mm512_mask2_permutex2var_epi32(
23206    a: __m512i,
23207    idx: __m512i,
23208    k: __mmask16,
23209    b: __m512i,
23210) -> __m512i {
23211    unsafe {
23212        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23213        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
23214    }
23215}
23216
23217/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23218///
23219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23220#[inline]
23221#[target_feature(enable = "avx512f,avx512vl")]
23222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23223#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23224pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23225    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23226}
23227
23228/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23229///
23230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23231#[inline]
23232#[target_feature(enable = "avx512f,avx512vl")]
23233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23234#[cfg_attr(test, assert_instr(vpermt2d))]
23235pub fn _mm256_mask_permutex2var_epi32(
23236    a: __m256i,
23237    k: __mmask8,
23238    idx: __m256i,
23239    b: __m256i,
23240) -> __m256i {
23241    unsafe {
23242        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23243        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
23244    }
23245}
23246
23247/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23248///
23249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23250#[inline]
23251#[target_feature(enable = "avx512f,avx512vl")]
23252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23253#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23254pub fn _mm256_maskz_permutex2var_epi32(
23255    k: __mmask8,
23256    a: __m256i,
23257    idx: __m256i,
23258    b: __m256i,
23259) -> __m256i {
23260    unsafe {
23261        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23262        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23263    }
23264}
23265
23266/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23267///
23268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23269#[inline]
23270#[target_feature(enable = "avx512f,avx512vl")]
23271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23272#[cfg_attr(test, assert_instr(vpermi2d))]
23273pub fn _mm256_mask2_permutex2var_epi32(
23274    a: __m256i,
23275    idx: __m256i,
23276    k: __mmask8,
23277    b: __m256i,
23278) -> __m256i {
23279    unsafe {
23280        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23281        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
23282    }
23283}
23284
23285/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23286///
23287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23288#[inline]
23289#[target_feature(enable = "avx512f,avx512vl")]
23290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23291#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23292pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23293    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23294}
23295
23296/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23297///
23298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23299#[inline]
23300#[target_feature(enable = "avx512f,avx512vl")]
23301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23302#[cfg_attr(test, assert_instr(vpermt2d))]
23303pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23304    unsafe {
23305        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23306        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
23307    }
23308}
23309
23310/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23311///
23312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23313#[inline]
23314#[target_feature(enable = "avx512f,avx512vl")]
23315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23316#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23317pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23318    unsafe {
23319        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23320        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
23321    }
23322}
23323
23324/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23325///
23326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23327#[inline]
23328#[target_feature(enable = "avx512f,avx512vl")]
23329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23330#[cfg_attr(test, assert_instr(vpermi2d))]
23331pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23332    unsafe {
23333        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23334        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
23335    }
23336}
23337
23338/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23339///
23340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23341#[inline]
23342#[target_feature(enable = "avx512f")]
23343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23344#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23345pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23346    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23347}
23348
23349/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23350///
23351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23352#[inline]
23353#[target_feature(enable = "avx512f")]
23354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23355#[cfg_attr(test, assert_instr(vpermt2q))]
23356pub fn _mm512_mask_permutex2var_epi64(
23357    a: __m512i,
23358    k: __mmask8,
23359    idx: __m512i,
23360    b: __m512i,
23361) -> __m512i {
23362    unsafe {
23363        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23364        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
23365    }
23366}
23367
23368/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23369///
23370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23371#[inline]
23372#[target_feature(enable = "avx512f")]
23373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23374#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23375pub fn _mm512_maskz_permutex2var_epi64(
23376    k: __mmask8,
23377    a: __m512i,
23378    idx: __m512i,
23379    b: __m512i,
23380) -> __m512i {
23381    unsafe {
23382        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23383        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23384    }
23385}
23386
23387/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23388///
23389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23390#[inline]
23391#[target_feature(enable = "avx512f")]
23392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23393#[cfg_attr(test, assert_instr(vpermi2q))]
23394pub fn _mm512_mask2_permutex2var_epi64(
23395    a: __m512i,
23396    idx: __m512i,
23397    k: __mmask8,
23398    b: __m512i,
23399) -> __m512i {
23400    unsafe {
23401        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23402        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
23403    }
23404}
23405
23406/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23407///
23408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23409#[inline]
23410#[target_feature(enable = "avx512f,avx512vl")]
23411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23412#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23413pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23414    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23415}
23416
23417/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23418///
23419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23420#[inline]
23421#[target_feature(enable = "avx512f,avx512vl")]
23422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23423#[cfg_attr(test, assert_instr(vpermt2q))]
23424pub fn _mm256_mask_permutex2var_epi64(
23425    a: __m256i,
23426    k: __mmask8,
23427    idx: __m256i,
23428    b: __m256i,
23429) -> __m256i {
23430    unsafe {
23431        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23432        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
23433    }
23434}
23435
23436/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23437///
23438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23439#[inline]
23440#[target_feature(enable = "avx512f,avx512vl")]
23441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23442#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23443pub fn _mm256_maskz_permutex2var_epi64(
23444    k: __mmask8,
23445    a: __m256i,
23446    idx: __m256i,
23447    b: __m256i,
23448) -> __m256i {
23449    unsafe {
23450        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23451        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23452    }
23453}
23454
23455/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23456///
23457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23458#[inline]
23459#[target_feature(enable = "avx512f,avx512vl")]
23460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23461#[cfg_attr(test, assert_instr(vpermi2q))]
23462pub fn _mm256_mask2_permutex2var_epi64(
23463    a: __m256i,
23464    idx: __m256i,
23465    k: __mmask8,
23466    b: __m256i,
23467) -> __m256i {
23468    unsafe {
23469        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23470        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
23471    }
23472}
23473
23474/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23475///
23476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23477#[inline]
23478#[target_feature(enable = "avx512f,avx512vl")]
23479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23480#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23481pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23482    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23483}
23484
23485/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23486///
23487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23488#[inline]
23489#[target_feature(enable = "avx512f,avx512vl")]
23490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23491#[cfg_attr(test, assert_instr(vpermt2q))]
23492pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23493    unsafe {
23494        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23495        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
23496    }
23497}
23498
23499/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23500///
23501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23502#[inline]
23503#[target_feature(enable = "avx512f,avx512vl")]
23504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23505#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23506pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23507    unsafe {
23508        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23509        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
23510    }
23511}
23512
23513/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23514///
23515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23516#[inline]
23517#[target_feature(enable = "avx512f,avx512vl")]
23518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23519#[cfg_attr(test, assert_instr(vpermi2q))]
23520pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23521    unsafe {
23522        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23523        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
23524    }
23525}
23526
23527/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23528///
23529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23530#[inline]
23531#[target_feature(enable = "avx512f")]
23532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23533#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23534pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23535    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23536}
23537
23538/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23539///
23540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23541#[inline]
23542#[target_feature(enable = "avx512f")]
23543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23544#[cfg_attr(test, assert_instr(vpermt2ps))]
23545pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23546    unsafe {
23547        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23548        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
23549    }
23550}
23551
23552/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23553///
23554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23555#[inline]
23556#[target_feature(enable = "avx512f")]
23557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23558#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23559pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23560    unsafe {
23561        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23562        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23563    }
23564}
23565
23566/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23567///
23568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23569#[inline]
23570#[target_feature(enable = "avx512f")]
23571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23572#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23573pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23574    unsafe {
23575        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23576        let idx = _mm512_castsi512_ps(idx).as_f32x16();
23577        transmute(simd_select_bitmask(k, permute, idx))
23578    }
23579}
23580
23581/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23582///
23583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23584#[inline]
23585#[target_feature(enable = "avx512f,avx512vl")]
23586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23587#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23588pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23589    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23590}
23591
23592/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23593///
23594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23595#[inline]
23596#[target_feature(enable = "avx512f,avx512vl")]
23597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23598#[cfg_attr(test, assert_instr(vpermt2ps))]
23599pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23600    unsafe {
23601        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23602        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
23603    }
23604}
23605
23606/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23607///
23608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23609#[inline]
23610#[target_feature(enable = "avx512f,avx512vl")]
23611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23612#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23613pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23614    unsafe {
23615        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23616        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23617    }
23618}
23619
23620/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23621///
23622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23623#[inline]
23624#[target_feature(enable = "avx512f,avx512vl")]
23625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23626#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23627pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23628    unsafe {
23629        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23630        let idx = _mm256_castsi256_ps(idx).as_f32x8();
23631        transmute(simd_select_bitmask(k, permute, idx))
23632    }
23633}
23634
23635/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23636///
23637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23638#[inline]
23639#[target_feature(enable = "avx512f,avx512vl")]
23640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23641#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23642pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23643    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23644}
23645
23646/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23647///
23648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23649#[inline]
23650#[target_feature(enable = "avx512f,avx512vl")]
23651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23652#[cfg_attr(test, assert_instr(vpermt2ps))]
23653pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23654    unsafe {
23655        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23656        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
23657    }
23658}
23659
23660/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23661///
23662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23663#[inline]
23664#[target_feature(enable = "avx512f,avx512vl")]
23665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23666#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23667pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23668    unsafe {
23669        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23670        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23671    }
23672}
23673
23674/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23675///
23676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23677#[inline]
23678#[target_feature(enable = "avx512f,avx512vl")]
23679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23680#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23681pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23682    unsafe {
23683        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23684        let idx = _mm_castsi128_ps(idx).as_f32x4();
23685        transmute(simd_select_bitmask(k, permute, idx))
23686    }
23687}
23688
23689/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23690///
23691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23692#[inline]
23693#[target_feature(enable = "avx512f")]
23694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23695#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23696pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23697    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23698}
23699
23700/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23701///
23702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23703#[inline]
23704#[target_feature(enable = "avx512f")]
23705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23706#[cfg_attr(test, assert_instr(vpermt2pd))]
23707pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23708    unsafe {
23709        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23710        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
23711    }
23712}
23713
23714/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23715///
23716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23717#[inline]
23718#[target_feature(enable = "avx512f")]
23719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23720#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23721pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23722    unsafe {
23723        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23724        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23725    }
23726}
23727
23728/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23729///
23730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23731#[inline]
23732#[target_feature(enable = "avx512f")]
23733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23734#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23735pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23736    unsafe {
23737        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23738        let idx = _mm512_castsi512_pd(idx).as_f64x8();
23739        transmute(simd_select_bitmask(k, permute, idx))
23740    }
23741}
23742
23743/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23744///
23745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23746#[inline]
23747#[target_feature(enable = "avx512f,avx512vl")]
23748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23749#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23750pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23751    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23752}
23753
23754/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23755///
23756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23757#[inline]
23758#[target_feature(enable = "avx512f,avx512vl")]
23759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23760#[cfg_attr(test, assert_instr(vpermt2pd))]
23761pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23762    unsafe {
23763        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23764        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
23765    }
23766}
23767
23768/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23769///
23770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23771#[inline]
23772#[target_feature(enable = "avx512f,avx512vl")]
23773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23774#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23775pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23776    unsafe {
23777        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23778        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23779    }
23780}
23781
23782/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23783///
23784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23785#[inline]
23786#[target_feature(enable = "avx512f,avx512vl")]
23787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23788#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23789pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23790    unsafe {
23791        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23792        let idx = _mm256_castsi256_pd(idx).as_f64x4();
23793        transmute(simd_select_bitmask(k, permute, idx))
23794    }
23795}
23796
23797/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23798///
23799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23800#[inline]
23801#[target_feature(enable = "avx512f,avx512vl")]
23802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23803#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23804pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23805    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23806}
23807
23808/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23809///
23810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23811#[inline]
23812#[target_feature(enable = "avx512f,avx512vl")]
23813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23814#[cfg_attr(test, assert_instr(vpermt2pd))]
23815pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23816    unsafe {
23817        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23818        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
23819    }
23820}
23821
23822/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23823///
23824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23825#[inline]
23826#[target_feature(enable = "avx512f,avx512vl")]
23827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23828#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23829pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23830    unsafe {
23831        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23832        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23833    }
23834}
23835
23836/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23837///
23838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23839#[inline]
23840#[target_feature(enable = "avx512f,avx512vl")]
23841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23842#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23843pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23844    unsafe {
23845        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23846        let idx = _mm_castsi128_pd(idx).as_f64x2();
23847        transmute(simd_select_bitmask(k, permute, idx))
23848    }
23849}
23850
23851/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23852///
23853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23854#[inline]
23855#[target_feature(enable = "avx512f")]
23856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23857#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23858#[rustc_legacy_const_generics(1)]
23859pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23860    unsafe {
23861        static_assert_uimm_bits!(MASK, 8);
23862        let r: i32x16 = simd_shuffle!(
23863            a.as_i32x16(),
23864            a.as_i32x16(),
23865            [
23866                MASK as u32 & 0b11,
23867                (MASK as u32 >> 2) & 0b11,
23868                (MASK as u32 >> 4) & 0b11,
23869                (MASK as u32 >> 6) & 0b11,
23870                (MASK as u32 & 0b11) + 4,
23871                ((MASK as u32 >> 2) & 0b11) + 4,
23872                ((MASK as u32 >> 4) & 0b11) + 4,
23873                ((MASK as u32 >> 6) & 0b11) + 4,
23874                (MASK as u32 & 0b11) + 8,
23875                ((MASK as u32 >> 2) & 0b11) + 8,
23876                ((MASK as u32 >> 4) & 0b11) + 8,
23877                ((MASK as u32 >> 6) & 0b11) + 8,
23878                (MASK as u32 & 0b11) + 12,
23879                ((MASK as u32 >> 2) & 0b11) + 12,
23880                ((MASK as u32 >> 4) & 0b11) + 12,
23881                ((MASK as u32 >> 6) & 0b11) + 12,
23882            ],
23883        );
23884        transmute(r)
23885    }
23886}
23887
23888/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23889///
23890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23891#[inline]
23892#[target_feature(enable = "avx512f")]
23893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23894#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23895#[rustc_legacy_const_generics(3)]
23896pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23897    src: __m512i,
23898    k: __mmask16,
23899    a: __m512i,
23900) -> __m512i {
23901    unsafe {
23902        static_assert_uimm_bits!(MASK, 8);
23903        let r = _mm512_shuffle_epi32::<MASK>(a);
23904        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
23905    }
23906}
23907
23908/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23909///
23910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23911#[inline]
23912#[target_feature(enable = "avx512f")]
23913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23914#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23915#[rustc_legacy_const_generics(2)]
23916pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23917    unsafe {
23918        static_assert_uimm_bits!(MASK, 8);
23919        let r = _mm512_shuffle_epi32::<MASK>(a);
23920        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
23921    }
23922}
23923
23924/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23925///
23926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23927#[inline]
23928#[target_feature(enable = "avx512f,avx512vl")]
23929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23930#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23931#[rustc_legacy_const_generics(3)]
23932pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23933    src: __m256i,
23934    k: __mmask8,
23935    a: __m256i,
23936) -> __m256i {
23937    unsafe {
23938        static_assert_uimm_bits!(MASK, 8);
23939        let r = _mm256_shuffle_epi32::<MASK>(a);
23940        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
23941    }
23942}
23943
23944/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23945///
23946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23947#[inline]
23948#[target_feature(enable = "avx512f,avx512vl")]
23949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23950#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23951#[rustc_legacy_const_generics(2)]
23952pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23953    unsafe {
23954        static_assert_uimm_bits!(MASK, 8);
23955        let r = _mm256_shuffle_epi32::<MASK>(a);
23956        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
23957    }
23958}
23959
23960/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23961///
23962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23963#[inline]
23964#[target_feature(enable = "avx512f,avx512vl")]
23965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23966#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23967#[rustc_legacy_const_generics(3)]
23968pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23969    src: __m128i,
23970    k: __mmask8,
23971    a: __m128i,
23972) -> __m128i {
23973    unsafe {
23974        static_assert_uimm_bits!(MASK, 8);
23975        let r = _mm_shuffle_epi32::<MASK>(a);
23976        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
23977    }
23978}
23979
23980/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23981///
23982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
23983#[inline]
23984#[target_feature(enable = "avx512f,avx512vl")]
23985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23986#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23987#[rustc_legacy_const_generics(2)]
23988pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
23989    unsafe {
23990        static_assert_uimm_bits!(MASK, 8);
23991        let r = _mm_shuffle_epi32::<MASK>(a);
23992        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
23993    }
23994}
23995
23996/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23997///
23998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
23999#[inline]
24000#[target_feature(enable = "avx512f")]
24001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24002#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24003#[rustc_legacy_const_generics(2)]
24004pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24005    unsafe {
24006        static_assert_uimm_bits!(MASK, 8);
24007        simd_shuffle!(
24008            a,
24009            b,
24010            [
24011                MASK as u32 & 0b11,
24012                (MASK as u32 >> 2) & 0b11,
24013                ((MASK as u32 >> 4) & 0b11) + 16,
24014                ((MASK as u32 >> 6) & 0b11) + 16,
24015                (MASK as u32 & 0b11) + 4,
24016                ((MASK as u32 >> 2) & 0b11) + 4,
24017                ((MASK as u32 >> 4) & 0b11) + 20,
24018                ((MASK as u32 >> 6) & 0b11) + 20,
24019                (MASK as u32 & 0b11) + 8,
24020                ((MASK as u32 >> 2) & 0b11) + 8,
24021                ((MASK as u32 >> 4) & 0b11) + 24,
24022                ((MASK as u32 >> 6) & 0b11) + 24,
24023                (MASK as u32 & 0b11) + 12,
24024                ((MASK as u32 >> 2) & 0b11) + 12,
24025                ((MASK as u32 >> 4) & 0b11) + 28,
24026                ((MASK as u32 >> 6) & 0b11) + 28,
24027            ],
24028        )
24029    }
24030}
24031
24032/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24033///
24034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24035#[inline]
24036#[target_feature(enable = "avx512f")]
24037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24038#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24039#[rustc_legacy_const_generics(4)]
24040pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
24041    src: __m512,
24042    k: __mmask16,
24043    a: __m512,
24044    b: __m512,
24045) -> __m512 {
24046    unsafe {
24047        static_assert_uimm_bits!(MASK, 8);
24048        let r = _mm512_shuffle_ps::<MASK>(a, b);
24049        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24050    }
24051}
24052
24053/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24054///
24055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24056#[inline]
24057#[target_feature(enable = "avx512f")]
24058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24059#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24060#[rustc_legacy_const_generics(3)]
24061pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24062    unsafe {
24063        static_assert_uimm_bits!(MASK, 8);
24064        let r = _mm512_shuffle_ps::<MASK>(a, b);
24065        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24066    }
24067}
24068
24069/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24070///
24071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24072#[inline]
24073#[target_feature(enable = "avx512f,avx512vl")]
24074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24075#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24076#[rustc_legacy_const_generics(4)]
24077pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24078    src: __m256,
24079    k: __mmask8,
24080    a: __m256,
24081    b: __m256,
24082) -> __m256 {
24083    unsafe {
24084        static_assert_uimm_bits!(MASK, 8);
24085        let r = _mm256_shuffle_ps::<MASK>(a, b);
24086        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24087    }
24088}
24089
24090/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24091///
24092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24093#[inline]
24094#[target_feature(enable = "avx512f,avx512vl")]
24095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24096#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24097#[rustc_legacy_const_generics(3)]
24098pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24099    unsafe {
24100        static_assert_uimm_bits!(MASK, 8);
24101        let r = _mm256_shuffle_ps::<MASK>(a, b);
24102        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24103    }
24104}
24105
24106/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24107///
24108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24109#[inline]
24110#[target_feature(enable = "avx512f,avx512vl")]
24111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24112#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24113#[rustc_legacy_const_generics(4)]
24114pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24115    src: __m128,
24116    k: __mmask8,
24117    a: __m128,
24118    b: __m128,
24119) -> __m128 {
24120    unsafe {
24121        static_assert_uimm_bits!(MASK, 8);
24122        let r = _mm_shuffle_ps::<MASK>(a, b);
24123        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24124    }
24125}
24126
24127/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24128///
24129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24130#[inline]
24131#[target_feature(enable = "avx512f,avx512vl")]
24132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24133#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24134#[rustc_legacy_const_generics(3)]
24135pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24136    unsafe {
24137        static_assert_uimm_bits!(MASK, 8);
24138        let r = _mm_shuffle_ps::<MASK>(a, b);
24139        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24140    }
24141}
24142
24143/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24144///
24145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24146#[inline]
24147#[target_feature(enable = "avx512f")]
24148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24149#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24150#[rustc_legacy_const_generics(2)]
24151pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24152    unsafe {
24153        static_assert_uimm_bits!(MASK, 8);
24154        simd_shuffle!(
24155            a,
24156            b,
24157            [
24158                MASK as u32 & 0b1,
24159                ((MASK as u32 >> 1) & 0b1) + 8,
24160                ((MASK as u32 >> 2) & 0b1) + 2,
24161                ((MASK as u32 >> 3) & 0b1) + 10,
24162                ((MASK as u32 >> 4) & 0b1) + 4,
24163                ((MASK as u32 >> 5) & 0b1) + 12,
24164                ((MASK as u32 >> 6) & 0b1) + 6,
24165                ((MASK as u32 >> 7) & 0b1) + 14,
24166            ],
24167        )
24168    }
24169}
24170
24171/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24172///
24173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24174#[inline]
24175#[target_feature(enable = "avx512f")]
24176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24177#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24178#[rustc_legacy_const_generics(4)]
24179pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24180    src: __m512d,
24181    k: __mmask8,
24182    a: __m512d,
24183    b: __m512d,
24184) -> __m512d {
24185    unsafe {
24186        static_assert_uimm_bits!(MASK, 8);
24187        let r = _mm512_shuffle_pd::<MASK>(a, b);
24188        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24189    }
24190}
24191
24192/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24193///
24194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24195#[inline]
24196#[target_feature(enable = "avx512f")]
24197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24198#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24199#[rustc_legacy_const_generics(3)]
24200pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24201    unsafe {
24202        static_assert_uimm_bits!(MASK, 8);
24203        let r = _mm512_shuffle_pd::<MASK>(a, b);
24204        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24205    }
24206}
24207
24208/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24209///
24210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24211#[inline]
24212#[target_feature(enable = "avx512f,avx512vl")]
24213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24214#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24215#[rustc_legacy_const_generics(4)]
24216pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24217    src: __m256d,
24218    k: __mmask8,
24219    a: __m256d,
24220    b: __m256d,
24221) -> __m256d {
24222    unsafe {
24223        static_assert_uimm_bits!(MASK, 8);
24224        let r = _mm256_shuffle_pd::<MASK>(a, b);
24225        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24226    }
24227}
24228
24229/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24230///
24231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24232#[inline]
24233#[target_feature(enable = "avx512f,avx512vl")]
24234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24235#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24236#[rustc_legacy_const_generics(3)]
24237pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24238    unsafe {
24239        static_assert_uimm_bits!(MASK, 8);
24240        let r = _mm256_shuffle_pd::<MASK>(a, b);
24241        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24242    }
24243}
24244
24245/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24246///
24247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24248#[inline]
24249#[target_feature(enable = "avx512f,avx512vl")]
24250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24251#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24252#[rustc_legacy_const_generics(4)]
24253pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24254    src: __m128d,
24255    k: __mmask8,
24256    a: __m128d,
24257    b: __m128d,
24258) -> __m128d {
24259    unsafe {
24260        static_assert_uimm_bits!(MASK, 8);
24261        let r = _mm_shuffle_pd::<MASK>(a, b);
24262        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
24263    }
24264}
24265
24266/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24267///
24268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24269#[inline]
24270#[target_feature(enable = "avx512f,avx512vl")]
24271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24272#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24273#[rustc_legacy_const_generics(3)]
24274pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24275    unsafe {
24276        static_assert_uimm_bits!(MASK, 8);
24277        let r = _mm_shuffle_pd::<MASK>(a, b);
24278        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
24279    }
24280}
24281
24282/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24283///
24284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24285#[inline]
24286#[target_feature(enable = "avx512f")]
24287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24288#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24289#[rustc_legacy_const_generics(2)]
24290pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24291    unsafe {
24292        static_assert_uimm_bits!(MASK, 8);
24293        let a = a.as_i32x16();
24294        let b = b.as_i32x16();
24295        let r: i32x16 = simd_shuffle!(
24296            a,
24297            b,
24298            [
24299                (MASK as u32 & 0b11) * 4 + 0,
24300                (MASK as u32 & 0b11) * 4 + 1,
24301                (MASK as u32 & 0b11) * 4 + 2,
24302                (MASK as u32 & 0b11) * 4 + 3,
24303                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24304                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24305                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24306                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24307                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24308                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24309                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24310                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24311                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24312                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24313                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24314                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24315            ],
24316        );
24317        transmute(r)
24318    }
24319}
24320
24321/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24322///
24323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24324#[inline]
24325#[target_feature(enable = "avx512f")]
24326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24327#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24328#[rustc_legacy_const_generics(4)]
24329pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24330    src: __m512i,
24331    k: __mmask16,
24332    a: __m512i,
24333    b: __m512i,
24334) -> __m512i {
24335    unsafe {
24336        static_assert_uimm_bits!(MASK, 8);
24337        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24338        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24339    }
24340}
24341
24342/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24343///
24344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24345#[inline]
24346#[target_feature(enable = "avx512f")]
24347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24348#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24349#[rustc_legacy_const_generics(3)]
24350pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24351    k: __mmask16,
24352    a: __m512i,
24353    b: __m512i,
24354) -> __m512i {
24355    unsafe {
24356        static_assert_uimm_bits!(MASK, 8);
24357        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24358        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24359    }
24360}
24361
24362/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24363///
24364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24365#[inline]
24366#[target_feature(enable = "avx512f,avx512vl")]
24367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24368#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24369#[rustc_legacy_const_generics(2)]
24370pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24371    unsafe {
24372        static_assert_uimm_bits!(MASK, 8);
24373        let a = a.as_i32x8();
24374        let b = b.as_i32x8();
24375        let r: i32x8 = simd_shuffle!(
24376            a,
24377            b,
24378            [
24379                (MASK as u32 & 0b1) * 4 + 0,
24380                (MASK as u32 & 0b1) * 4 + 1,
24381                (MASK as u32 & 0b1) * 4 + 2,
24382                (MASK as u32 & 0b1) * 4 + 3,
24383                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24384                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24385                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24386                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24387            ],
24388        );
24389        transmute(r)
24390    }
24391}
24392
24393/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24394///
24395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24396#[inline]
24397#[target_feature(enable = "avx512f,avx512vl")]
24398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24399#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24400#[rustc_legacy_const_generics(4)]
24401pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24402    src: __m256i,
24403    k: __mmask8,
24404    a: __m256i,
24405    b: __m256i,
24406) -> __m256i {
24407    unsafe {
24408        static_assert_uimm_bits!(MASK, 8);
24409        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24410        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24411    }
24412}
24413
24414/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24415///
24416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24417#[inline]
24418#[target_feature(enable = "avx512f,avx512vl")]
24419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24420#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24421#[rustc_legacy_const_generics(3)]
24422pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24423    unsafe {
24424        static_assert_uimm_bits!(MASK, 8);
24425        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24426        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24427    }
24428}
24429
24430/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24431///
24432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24433#[inline]
24434#[target_feature(enable = "avx512f")]
24435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24436#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24437#[rustc_legacy_const_generics(2)]
24438pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24439    unsafe {
24440        static_assert_uimm_bits!(MASK, 8);
24441        let a = a.as_i64x8();
24442        let b = b.as_i64x8();
24443        let r: i64x8 = simd_shuffle!(
24444            a,
24445            b,
24446            [
24447                (MASK as u32 & 0b11) * 2 + 0,
24448                (MASK as u32 & 0b11) * 2 + 1,
24449                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24450                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24451                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24452                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24453                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24454                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24455            ],
24456        );
24457        transmute(r)
24458    }
24459}
24460
24461/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24462///
24463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24464#[inline]
24465#[target_feature(enable = "avx512f")]
24466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24467#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24468#[rustc_legacy_const_generics(4)]
24469pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24470    src: __m512i,
24471    k: __mmask8,
24472    a: __m512i,
24473    b: __m512i,
24474) -> __m512i {
24475    unsafe {
24476        static_assert_uimm_bits!(MASK, 8);
24477        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24478        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
24479    }
24480}
24481
24482/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24483///
24484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24485#[inline]
24486#[target_feature(enable = "avx512f")]
24487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24488#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24489#[rustc_legacy_const_generics(3)]
24490pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24491    unsafe {
24492        static_assert_uimm_bits!(MASK, 8);
24493        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24494        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
24495    }
24496}
24497
24498/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24499///
24500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24501#[inline]
24502#[target_feature(enable = "avx512f,avx512vl")]
24503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24504#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24505#[rustc_legacy_const_generics(2)]
24506pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24507    unsafe {
24508        static_assert_uimm_bits!(MASK, 8);
24509        let a = a.as_i64x4();
24510        let b = b.as_i64x4();
24511        let r: i64x4 = simd_shuffle!(
24512            a,
24513            b,
24514            [
24515                (MASK as u32 & 0b1) * 2 + 0,
24516                (MASK as u32 & 0b1) * 2 + 1,
24517                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24518                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24519            ],
24520        );
24521        transmute(r)
24522    }
24523}
24524
24525/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24526///
24527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24528#[inline]
24529#[target_feature(enable = "avx512f,avx512vl")]
24530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24531#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24532#[rustc_legacy_const_generics(4)]
24533pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24534    src: __m256i,
24535    k: __mmask8,
24536    a: __m256i,
24537    b: __m256i,
24538) -> __m256i {
24539    unsafe {
24540        static_assert_uimm_bits!(MASK, 8);
24541        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24542        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24543    }
24544}
24545
24546/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24547///
24548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24549#[inline]
24550#[target_feature(enable = "avx512f,avx512vl")]
24551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24552#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24553#[rustc_legacy_const_generics(3)]
24554pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24555    unsafe {
24556        static_assert_uimm_bits!(MASK, 8);
24557        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24558        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24559    }
24560}
24561
24562/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24563///
24564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24565#[inline]
24566#[target_feature(enable = "avx512f")]
24567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24568#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24569#[rustc_legacy_const_generics(2)]
24570pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24571    unsafe {
24572        static_assert_uimm_bits!(MASK, 8);
24573        let a = a.as_f32x16();
24574        let b = b.as_f32x16();
24575        let r: f32x16 = simd_shuffle!(
24576            a,
24577            b,
24578            [
24579                (MASK as u32 & 0b11) * 4 + 0,
24580                (MASK as u32 & 0b11) * 4 + 1,
24581                (MASK as u32 & 0b11) * 4 + 2,
24582                (MASK as u32 & 0b11) * 4 + 3,
24583                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24584                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24585                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24586                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24587                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24588                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24589                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24590                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24591                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24592                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24593                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24594                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24595            ],
24596        );
24597        transmute(r)
24598    }
24599}
24600
24601/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24602///
24603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24604#[inline]
24605#[target_feature(enable = "avx512f")]
24606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24607#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24608#[rustc_legacy_const_generics(4)]
24609pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24610    src: __m512,
24611    k: __mmask16,
24612    a: __m512,
24613    b: __m512,
24614) -> __m512 {
24615    unsafe {
24616        static_assert_uimm_bits!(MASK, 8);
24617        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24618        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24619    }
24620}
24621
24622/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24623///
24624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24625#[inline]
24626#[target_feature(enable = "avx512f")]
24627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24628#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24629#[rustc_legacy_const_generics(3)]
24630pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24631    unsafe {
24632        static_assert_uimm_bits!(MASK, 8);
24633        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24634        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24635    }
24636}
24637
24638/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24639///
24640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24641#[inline]
24642#[target_feature(enable = "avx512f,avx512vl")]
24643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24644#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24645#[rustc_legacy_const_generics(2)]
24646pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24647    unsafe {
24648        static_assert_uimm_bits!(MASK, 8);
24649        let a = a.as_f32x8();
24650        let b = b.as_f32x8();
24651        let r: f32x8 = simd_shuffle!(
24652            a,
24653            b,
24654            [
24655                (MASK as u32 & 0b1) * 4 + 0,
24656                (MASK as u32 & 0b1) * 4 + 1,
24657                (MASK as u32 & 0b1) * 4 + 2,
24658                (MASK as u32 & 0b1) * 4 + 3,
24659                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24660                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24661                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24662                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24663            ],
24664        );
24665        transmute(r)
24666    }
24667}
24668
24669/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24670///
24671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24672#[inline]
24673#[target_feature(enable = "avx512f,avx512vl")]
24674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24675#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24676#[rustc_legacy_const_generics(4)]
24677pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24678    src: __m256,
24679    k: __mmask8,
24680    a: __m256,
24681    b: __m256,
24682) -> __m256 {
24683    unsafe {
24684        static_assert_uimm_bits!(MASK, 8);
24685        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24686        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24687    }
24688}
24689
24690/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24691///
24692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24693#[inline]
24694#[target_feature(enable = "avx512f,avx512vl")]
24695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24696#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24697#[rustc_legacy_const_generics(3)]
24698pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24699    unsafe {
24700        static_assert_uimm_bits!(MASK, 8);
24701        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24702        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24703    }
24704}
24705
24706/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24707///
24708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24709#[inline]
24710#[target_feature(enable = "avx512f")]
24711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24712#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24713#[rustc_legacy_const_generics(2)]
24714pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24715    unsafe {
24716        static_assert_uimm_bits!(MASK, 8);
24717        let a = a.as_f64x8();
24718        let b = b.as_f64x8();
24719        let r: f64x8 = simd_shuffle!(
24720            a,
24721            b,
24722            [
24723                (MASK as u32 & 0b11) * 2 + 0,
24724                (MASK as u32 & 0b11) * 2 + 1,
24725                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24726                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24727                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24728                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24729                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24730                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24731            ],
24732        );
24733        transmute(r)
24734    }
24735}
24736
24737/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24738///
24739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24740#[inline]
24741#[target_feature(enable = "avx512f")]
24742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24743#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24744#[rustc_legacy_const_generics(4)]
24745pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24746    src: __m512d,
24747    k: __mmask8,
24748    a: __m512d,
24749    b: __m512d,
24750) -> __m512d {
24751    unsafe {
24752        static_assert_uimm_bits!(MASK, 8);
24753        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24754        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24755    }
24756}
24757
24758/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24759///
24760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24761#[inline]
24762#[target_feature(enable = "avx512f")]
24763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24764#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24765#[rustc_legacy_const_generics(3)]
24766pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24767    unsafe {
24768        static_assert_uimm_bits!(MASK, 8);
24769        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24770        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24771    }
24772}
24773
24774/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24775///
24776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24777#[inline]
24778#[target_feature(enable = "avx512f,avx512vl")]
24779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24780#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24781#[rustc_legacy_const_generics(2)]
24782pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24783    unsafe {
24784        static_assert_uimm_bits!(MASK, 8);
24785        let a = a.as_f64x4();
24786        let b = b.as_f64x4();
24787        let r: f64x4 = simd_shuffle!(
24788            a,
24789            b,
24790            [
24791                (MASK as u32 & 0b1) * 2 + 0,
24792                (MASK as u32 & 0b1) * 2 + 1,
24793                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24794                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24795            ],
24796        );
24797        transmute(r)
24798    }
24799}
24800
24801/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24802///
24803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24804#[inline]
24805#[target_feature(enable = "avx512f,avx512vl")]
24806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24807#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24808#[rustc_legacy_const_generics(4)]
24809pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24810    src: __m256d,
24811    k: __mmask8,
24812    a: __m256d,
24813    b: __m256d,
24814) -> __m256d {
24815    unsafe {
24816        static_assert_uimm_bits!(MASK, 8);
24817        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24818        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24819    }
24820}
24821
24822/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24823///
24824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24825#[inline]
24826#[target_feature(enable = "avx512f,avx512vl")]
24827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24828#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24829#[rustc_legacy_const_generics(3)]
24830pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24831    unsafe {
24832        static_assert_uimm_bits!(MASK, 8);
24833        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24834        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24835    }
24836}
24837
24838/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24839///
24840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24841#[inline]
24842#[target_feature(enable = "avx512f")]
24843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24844#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24845#[rustc_legacy_const_generics(1)]
24846pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24847    unsafe {
24848        static_assert_uimm_bits!(IMM8, 2);
24849        match IMM8 & 0x3 {
24850            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24851            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24852            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24853            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24854        }
24855    }
24856}
24857
24858/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24859///
24860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24861#[inline]
24862#[target_feature(enable = "avx512f")]
24863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24864#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24865#[rustc_legacy_const_generics(3)]
24866pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24867    unsafe {
24868        static_assert_uimm_bits!(IMM8, 2);
24869        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24870        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24871    }
24872}
24873
24874/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24875///
24876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24877#[inline]
24878#[target_feature(enable = "avx512f")]
24879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24880#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24881#[rustc_legacy_const_generics(2)]
24882pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24883    unsafe {
24884        static_assert_uimm_bits!(IMM8, 2);
24885        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24886        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24887    }
24888}
24889
24890/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24891///
24892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24893#[inline]
24894#[target_feature(enable = "avx512f,avx512vl")]
24895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24896#[cfg_attr(
24897    test,
24898    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24899)]
24900#[rustc_legacy_const_generics(1)]
24901pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24902    unsafe {
24903        static_assert_uimm_bits!(IMM8, 1);
24904        match IMM8 & 0x1 {
24905            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24906            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24907        }
24908    }
24909}
24910
24911/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24912///
24913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24914#[inline]
24915#[target_feature(enable = "avx512f,avx512vl")]
24916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24917#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24918#[rustc_legacy_const_generics(3)]
24919pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24920    unsafe {
24921        static_assert_uimm_bits!(IMM8, 1);
24922        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24923        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24924    }
24925}
24926
24927/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24928///
24929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24930#[inline]
24931#[target_feature(enable = "avx512f,avx512vl")]
24932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24933#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24934#[rustc_legacy_const_generics(2)]
24935pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24936    unsafe {
24937        static_assert_uimm_bits!(IMM8, 1);
24938        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24939        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24940    }
24941}
24942
24943/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24944///
24945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24946#[inline]
24947#[target_feature(enable = "avx512f")]
24948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24949#[cfg_attr(
24950    test,
24951    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24952)]
24953#[rustc_legacy_const_generics(1)]
24954pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24955    unsafe {
24956        static_assert_uimm_bits!(IMM1, 1);
24957        match IMM1 {
24958            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24959            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24960        }
24961    }
24962}
24963
24964/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24965///
24966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24967#[inline]
24968#[target_feature(enable = "avx512f")]
24969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24970#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
24971#[rustc_legacy_const_generics(3)]
24972pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
24973    src: __m256i,
24974    k: __mmask8,
24975    a: __m512i,
24976) -> __m256i {
24977    unsafe {
24978        static_assert_uimm_bits!(IMM1, 1);
24979        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24980        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24981    }
24982}
24983
24984/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24985///
24986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
24987#[inline]
24988#[target_feature(enable = "avx512f")]
24989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24990#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
24991#[rustc_legacy_const_generics(2)]
24992pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
24993    unsafe {
24994        static_assert_uimm_bits!(IMM1, 1);
24995        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24996        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24997    }
24998}
24999
25000/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25001///
25002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25003#[inline]
25004#[target_feature(enable = "avx512f")]
25005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25006#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25007#[rustc_legacy_const_generics(1)]
25008pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25009    unsafe {
25010        static_assert_uimm_bits!(IMM8, 1);
25011        match IMM8 & 0x1 {
25012            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25013            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25014        }
25015    }
25016}
25017
25018/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25019///
25020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25021#[inline]
25022#[target_feature(enable = "avx512f")]
25023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25024#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25025#[rustc_legacy_const_generics(3)]
25026pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25027    src: __m256d,
25028    k: __mmask8,
25029    a: __m512d,
25030) -> __m256d {
25031    unsafe {
25032        static_assert_uimm_bits!(IMM8, 1);
25033        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25034        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25035    }
25036}
25037
25038/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25039///
25040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25041#[inline]
25042#[target_feature(enable = "avx512f")]
25043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25044#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25045#[rustc_legacy_const_generics(2)]
25046pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25047    unsafe {
25048        static_assert_uimm_bits!(IMM8, 1);
25049        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25050        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25051    }
25052}
25053
25054/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25055///
25056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25057#[inline]
25058#[target_feature(enable = "avx512f")]
25059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25060#[cfg_attr(
25061    test,
25062    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25063)]
25064#[rustc_legacy_const_generics(1)]
25065pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25066    unsafe {
25067        static_assert_uimm_bits!(IMM2, 2);
25068        let a = a.as_i32x16();
25069        let zero = i32x16::ZERO;
25070        let extract: i32x4 = match IMM2 {
25071            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25072            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25073            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25074            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25075        };
25076        transmute(extract)
25077    }
25078}
25079
25080/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25081///
25082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25083#[inline]
25084#[target_feature(enable = "avx512f")]
25085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25086#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25087#[rustc_legacy_const_generics(3)]
25088pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25089    src: __m128i,
25090    k: __mmask8,
25091    a: __m512i,
25092) -> __m128i {
25093    unsafe {
25094        static_assert_uimm_bits!(IMM2, 2);
25095        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25096        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25097    }
25098}
25099
25100/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25101///
25102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25103#[inline]
25104#[target_feature(enable = "avx512f")]
25105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25106#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25107#[rustc_legacy_const_generics(2)]
25108pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25109    unsafe {
25110        static_assert_uimm_bits!(IMM2, 2);
25111        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25112        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25113    }
25114}
25115
25116/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25117///
25118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25119#[inline]
25120#[target_feature(enable = "avx512f,avx512vl")]
25121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25122#[cfg_attr(
25123    test,
25124    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25125)]
25126#[rustc_legacy_const_generics(1)]
25127pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25128    unsafe {
25129        static_assert_uimm_bits!(IMM1, 1);
25130        let a = a.as_i32x8();
25131        let zero = i32x8::ZERO;
25132        let extract: i32x4 = match IMM1 {
25133            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25134            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25135        };
25136        transmute(extract)
25137    }
25138}
25139
25140/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25141///
25142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25143#[inline]
25144#[target_feature(enable = "avx512f,avx512vl")]
25145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25146#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25147#[rustc_legacy_const_generics(3)]
25148pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25149    src: __m128i,
25150    k: __mmask8,
25151    a: __m256i,
25152) -> __m128i {
25153    unsafe {
25154        static_assert_uimm_bits!(IMM1, 1);
25155        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25156        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25157    }
25158}
25159
25160/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25161///
25162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25163#[inline]
25164#[target_feature(enable = "avx512f,avx512vl")]
25165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25166#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25167#[rustc_legacy_const_generics(2)]
25168pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25169    unsafe {
25170        static_assert_uimm_bits!(IMM1, 1);
25171        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25172        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25173    }
25174}
25175
25176/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25177///
25178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25179#[inline]
25180#[target_feature(enable = "avx512f")]
25181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25182#[cfg_attr(test, assert_instr(vmovsldup))]
25183pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25184    unsafe {
25185        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25186        transmute(r)
25187    }
25188}
25189
25190/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25191///
25192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25193#[inline]
25194#[target_feature(enable = "avx512f")]
25195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25196#[cfg_attr(test, assert_instr(vmovsldup))]
25197pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25198    unsafe {
25199        let mov: f32x16 =
25200            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25201        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25202    }
25203}
25204
25205/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25206///
25207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25208#[inline]
25209#[target_feature(enable = "avx512f")]
25210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25211#[cfg_attr(test, assert_instr(vmovsldup))]
25212pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25213    unsafe {
25214        let mov: f32x16 =
25215            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25216        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25217    }
25218}
25219
25220/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25221///
25222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25223#[inline]
25224#[target_feature(enable = "avx512f,avx512vl")]
25225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25226#[cfg_attr(test, assert_instr(vmovsldup))]
25227pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25228    unsafe {
25229        let mov = _mm256_moveldup_ps(a);
25230        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25231    }
25232}
25233
25234/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25235///
25236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25237#[inline]
25238#[target_feature(enable = "avx512f,avx512vl")]
25239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25240#[cfg_attr(test, assert_instr(vmovsldup))]
25241pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25242    unsafe {
25243        let mov = _mm256_moveldup_ps(a);
25244        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25245    }
25246}
25247
25248/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25254#[cfg_attr(test, assert_instr(vmovsldup))]
25255pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25256    unsafe {
25257        let mov = _mm_moveldup_ps(a);
25258        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25259    }
25260}
25261
25262/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25263///
25264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25265#[inline]
25266#[target_feature(enable = "avx512f,avx512vl")]
25267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25268#[cfg_attr(test, assert_instr(vmovsldup))]
25269pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25270    unsafe {
25271        let mov = _mm_moveldup_ps(a);
25272        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25273    }
25274}
25275
25276/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25277///
25278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25279#[inline]
25280#[target_feature(enable = "avx512f")]
25281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25282#[cfg_attr(test, assert_instr(vmovshdup))]
25283pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25284    unsafe {
25285        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25286        transmute(r)
25287    }
25288}
25289
25290/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25291///
25292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25293#[inline]
25294#[target_feature(enable = "avx512f")]
25295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25296#[cfg_attr(test, assert_instr(vmovshdup))]
25297pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25298    unsafe {
25299        let mov: f32x16 =
25300            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25301        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25302    }
25303}
25304
25305/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25306///
25307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25308#[inline]
25309#[target_feature(enable = "avx512f")]
25310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25311#[cfg_attr(test, assert_instr(vmovshdup))]
25312pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25313    unsafe {
25314        let mov: f32x16 =
25315            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25316        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25317    }
25318}
25319
25320/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25321///
25322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25323#[inline]
25324#[target_feature(enable = "avx512f,avx512vl")]
25325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25326#[cfg_attr(test, assert_instr(vmovshdup))]
25327pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25328    unsafe {
25329        let mov = _mm256_movehdup_ps(a);
25330        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25331    }
25332}
25333
25334/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25335///
25336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25337#[inline]
25338#[target_feature(enable = "avx512f,avx512vl")]
25339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25340#[cfg_attr(test, assert_instr(vmovshdup))]
25341pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25342    unsafe {
25343        let mov = _mm256_movehdup_ps(a);
25344        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25345    }
25346}
25347
25348/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25349///
25350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25351#[inline]
25352#[target_feature(enable = "avx512f,avx512vl")]
25353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25354#[cfg_attr(test, assert_instr(vmovshdup))]
25355pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25356    unsafe {
25357        let mov = _mm_movehdup_ps(a);
25358        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25359    }
25360}
25361
25362/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25363///
25364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25365#[inline]
25366#[target_feature(enable = "avx512f,avx512vl")]
25367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25368#[cfg_attr(test, assert_instr(vmovshdup))]
25369pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25370    unsafe {
25371        let mov = _mm_movehdup_ps(a);
25372        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25373    }
25374}
25375
25376/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25377///
25378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25379#[inline]
25380#[target_feature(enable = "avx512f")]
25381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25382#[cfg_attr(test, assert_instr(vmovddup))]
25383pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25384    unsafe {
25385        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25386        transmute(r)
25387    }
25388}
25389
25390/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25391///
25392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25393#[inline]
25394#[target_feature(enable = "avx512f")]
25395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25396#[cfg_attr(test, assert_instr(vmovddup))]
25397pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25398    unsafe {
25399        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25400        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
25401    }
25402}
25403
25404/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25405///
25406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25407#[inline]
25408#[target_feature(enable = "avx512f")]
25409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25410#[cfg_attr(test, assert_instr(vmovddup))]
25411pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25412    unsafe {
25413        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25414        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
25415    }
25416}
25417
25418/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25419///
25420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25421#[inline]
25422#[target_feature(enable = "avx512f,avx512vl")]
25423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25424#[cfg_attr(test, assert_instr(vmovddup))]
25425pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25426    unsafe {
25427        let mov = _mm256_movedup_pd(a);
25428        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
25429    }
25430}
25431
25432/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25433///
25434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25435#[inline]
25436#[target_feature(enable = "avx512f,avx512vl")]
25437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25438#[cfg_attr(test, assert_instr(vmovddup))]
25439pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25440    unsafe {
25441        let mov = _mm256_movedup_pd(a);
25442        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
25443    }
25444}
25445
25446/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25447///
25448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25449#[inline]
25450#[target_feature(enable = "avx512f,avx512vl")]
25451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25452#[cfg_attr(test, assert_instr(vmovddup))]
25453pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25454    unsafe {
25455        let mov = _mm_movedup_pd(a);
25456        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
25457    }
25458}
25459
25460/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25461///
25462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25463#[inline]
25464#[target_feature(enable = "avx512f,avx512vl")]
25465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25466#[cfg_attr(test, assert_instr(vmovddup))]
25467pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25468    unsafe {
25469        let mov = _mm_movedup_pd(a);
25470        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
25471    }
25472}
25473
25474/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25475///
25476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25477#[inline]
25478#[target_feature(enable = "avx512f")]
25479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25480#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25481#[rustc_legacy_const_generics(2)]
25482pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25483    unsafe {
25484        static_assert_uimm_bits!(IMM8, 2);
25485        let a = a.as_i32x16();
25486        let b = _mm512_castsi128_si512(b).as_i32x16();
25487        let ret: i32x16 = match IMM8 & 0b11 {
25488            0 => {
25489                simd_shuffle!(
25490                    a,
25491                    b,
25492                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25493                )
25494            }
25495            1 => {
25496                simd_shuffle!(
25497                    a,
25498                    b,
25499                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25500                )
25501            }
25502            2 => {
25503                simd_shuffle!(
25504                    a,
25505                    b,
25506                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25507                )
25508            }
25509            _ => {
25510                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25511            }
25512        };
25513        transmute(ret)
25514    }
25515}
25516
25517/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25518///
25519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25520#[inline]
25521#[target_feature(enable = "avx512f")]
25522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25523#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25524#[rustc_legacy_const_generics(4)]
25525pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25526    src: __m512i,
25527    k: __mmask16,
25528    a: __m512i,
25529    b: __m128i,
25530) -> __m512i {
25531    unsafe {
25532        static_assert_uimm_bits!(IMM8, 2);
25533        let r = _mm512_inserti32x4::<IMM8>(a, b);
25534        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25535    }
25536}
25537
25538/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25539///
25540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25541#[inline]
25542#[target_feature(enable = "avx512f")]
25543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25544#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25545#[rustc_legacy_const_generics(3)]
25546pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25547    unsafe {
25548        static_assert_uimm_bits!(IMM8, 2);
25549        let r = _mm512_inserti32x4::<IMM8>(a, b);
25550        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25551    }
25552}
25553
25554/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25555///
25556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25557#[inline]
25558#[target_feature(enable = "avx512f,avx512vl")]
25559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25560#[cfg_attr(
25561    test,
25562    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25563)]
25564#[rustc_legacy_const_generics(2)]
25565pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25566    unsafe {
25567        static_assert_uimm_bits!(IMM8, 1);
25568        let a = a.as_i32x8();
25569        let b = _mm256_castsi128_si256(b).as_i32x8();
25570        let ret: i32x8 = match IMM8 & 0b1 {
25571            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25572            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25573        };
25574        transmute(ret)
25575    }
25576}
25577
25578/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25579///
25580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25581#[inline]
25582#[target_feature(enable = "avx512f,avx512vl")]
25583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25584#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25585#[rustc_legacy_const_generics(4)]
25586pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25587    src: __m256i,
25588    k: __mmask8,
25589    a: __m256i,
25590    b: __m128i,
25591) -> __m256i {
25592    unsafe {
25593        static_assert_uimm_bits!(IMM8, 1);
25594        let r = _mm256_inserti32x4::<IMM8>(a, b);
25595        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25596    }
25597}
25598
25599/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25600///
25601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25602#[inline]
25603#[target_feature(enable = "avx512f,avx512vl")]
25604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25605#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25606#[rustc_legacy_const_generics(3)]
25607pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25608    unsafe {
25609        static_assert_uimm_bits!(IMM8, 1);
25610        let r = _mm256_inserti32x4::<IMM8>(a, b);
25611        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25612    }
25613}
25614
25615/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25616///
25617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25618#[inline]
25619#[target_feature(enable = "avx512f")]
25620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25621#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25622#[rustc_legacy_const_generics(2)]
25623pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25624    unsafe {
25625        static_assert_uimm_bits!(IMM8, 1);
25626        let b = _mm512_castsi256_si512(b);
25627        match IMM8 & 0b1 {
25628            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25629            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25630        }
25631    }
25632}
25633
25634/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25635///
25636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25637#[inline]
25638#[target_feature(enable = "avx512f")]
25639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25640#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25641#[rustc_legacy_const_generics(4)]
25642pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25643    src: __m512i,
25644    k: __mmask8,
25645    a: __m512i,
25646    b: __m256i,
25647) -> __m512i {
25648    unsafe {
25649        static_assert_uimm_bits!(IMM8, 1);
25650        let r = _mm512_inserti64x4::<IMM8>(a, b);
25651        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25652    }
25653}
25654
25655/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25656///
25657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25658#[inline]
25659#[target_feature(enable = "avx512f")]
25660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25661#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25662#[rustc_legacy_const_generics(3)]
25663pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25664    unsafe {
25665        static_assert_uimm_bits!(IMM8, 1);
25666        let r = _mm512_inserti64x4::<IMM8>(a, b);
25667        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25668    }
25669}
25670
25671/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25672///
25673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25674#[inline]
25675#[target_feature(enable = "avx512f")]
25676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25677#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25678#[rustc_legacy_const_generics(2)]
25679pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25680    unsafe {
25681        static_assert_uimm_bits!(IMM8, 2);
25682        let b = _mm512_castps128_ps512(b);
25683        match IMM8 & 0b11 {
25684            0 => {
25685                simd_shuffle!(
25686                    a,
25687                    b,
25688                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25689                )
25690            }
25691            1 => {
25692                simd_shuffle!(
25693                    a,
25694                    b,
25695                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25696                )
25697            }
25698            2 => {
25699                simd_shuffle!(
25700                    a,
25701                    b,
25702                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25703                )
25704            }
25705            _ => {
25706                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25707            }
25708        }
25709    }
25710}
25711
25712/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25713///
25714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25715#[inline]
25716#[target_feature(enable = "avx512f")]
25717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25718#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25719#[rustc_legacy_const_generics(4)]
25720pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25721    src: __m512,
25722    k: __mmask16,
25723    a: __m512,
25724    b: __m128,
25725) -> __m512 {
25726    unsafe {
25727        static_assert_uimm_bits!(IMM8, 2);
25728        let r = _mm512_insertf32x4::<IMM8>(a, b);
25729        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25730    }
25731}
25732
25733/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25734///
25735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25736#[inline]
25737#[target_feature(enable = "avx512f")]
25738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25739#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25740#[rustc_legacy_const_generics(3)]
25741pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25742    unsafe {
25743        static_assert_uimm_bits!(IMM8, 2);
25744        let r = _mm512_insertf32x4::<IMM8>(a, b);
25745        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25746    }
25747}
25748
25749/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25750///
25751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25752#[inline]
25753#[target_feature(enable = "avx512f,avx512vl")]
25754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25755#[cfg_attr(
25756    test,
25757    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25758)]
25759#[rustc_legacy_const_generics(2)]
25760pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25761    unsafe {
25762        static_assert_uimm_bits!(IMM8, 1);
25763        let b = _mm256_castps128_ps256(b);
25764        match IMM8 & 0b1 {
25765            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25766            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25767        }
25768    }
25769}
25770
25771/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25772///
25773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25774#[inline]
25775#[target_feature(enable = "avx512f,avx512vl")]
25776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25777#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25778#[rustc_legacy_const_generics(4)]
25779pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25780    src: __m256,
25781    k: __mmask8,
25782    a: __m256,
25783    b: __m128,
25784) -> __m256 {
25785    unsafe {
25786        static_assert_uimm_bits!(IMM8, 1);
25787        let r = _mm256_insertf32x4::<IMM8>(a, b);
25788        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25789    }
25790}
25791
25792/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25793///
25794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25795#[inline]
25796#[target_feature(enable = "avx512f,avx512vl")]
25797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25798#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25799#[rustc_legacy_const_generics(3)]
25800pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25801    unsafe {
25802        static_assert_uimm_bits!(IMM8, 1);
25803        let r = _mm256_insertf32x4::<IMM8>(a, b);
25804        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25805    }
25806}
25807
25808/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25809///
25810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25811#[inline]
25812#[target_feature(enable = "avx512f")]
25813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25814#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25815#[rustc_legacy_const_generics(2)]
25816pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25817    unsafe {
25818        static_assert_uimm_bits!(IMM8, 1);
25819        let b = _mm512_castpd256_pd512(b);
25820        match IMM8 & 0b1 {
25821            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25822            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25823        }
25824    }
25825}
25826
25827/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25828///
25829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25830#[inline]
25831#[target_feature(enable = "avx512f")]
25832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25833#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25834#[rustc_legacy_const_generics(4)]
25835pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25836    src: __m512d,
25837    k: __mmask8,
25838    a: __m512d,
25839    b: __m256d,
25840) -> __m512d {
25841    unsafe {
25842        static_assert_uimm_bits!(IMM8, 1);
25843        let r = _mm512_insertf64x4::<IMM8>(a, b);
25844        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25845    }
25846}
25847
25848/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25849///
25850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25851#[inline]
25852#[target_feature(enable = "avx512f")]
25853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25854#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25855#[rustc_legacy_const_generics(3)]
25856pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25857    unsafe {
25858        static_assert_uimm_bits!(IMM8, 1);
25859        let r = _mm512_insertf64x4::<IMM8>(a, b);
25860        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25861    }
25862}
25863
25864/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25865///
25866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25867#[inline]
25868#[target_feature(enable = "avx512f")]
25869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25870#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25871pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25872    unsafe {
25873        let a = a.as_i32x16();
25874        let b = b.as_i32x16();
25875        #[rustfmt::skip]
25876        let r: i32x16 = simd_shuffle!(
25877            a, b,
25878            [ 2, 18, 3, 19,
25879              2 + 4, 18 + 4, 3 + 4, 19 + 4,
25880              2 + 8, 18 + 8, 3 + 8, 19 + 8,
25881              2 + 12, 18 + 12, 3 + 12, 19 + 12],
25882        );
25883        transmute(r)
25884    }
25885}
25886
25887/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25888///
25889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25890#[inline]
25891#[target_feature(enable = "avx512f")]
25892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25893#[cfg_attr(test, assert_instr(vpunpckhdq))]
25894pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25895    unsafe {
25896        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25897        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
25898    }
25899}
25900
25901/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25902///
25903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25904#[inline]
25905#[target_feature(enable = "avx512f")]
25906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25907#[cfg_attr(test, assert_instr(vpunpckhdq))]
25908pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25909    unsafe {
25910        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25911        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
25912    }
25913}
25914
25915/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25916///
25917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25918#[inline]
25919#[target_feature(enable = "avx512f,avx512vl")]
25920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25921#[cfg_attr(test, assert_instr(vpunpckhdq))]
25922pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25923    unsafe {
25924        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25925        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
25926    }
25927}
25928
25929/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25930///
25931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25932#[inline]
25933#[target_feature(enable = "avx512f,avx512vl")]
25934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25935#[cfg_attr(test, assert_instr(vpunpckhdq))]
25936pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25937    unsafe {
25938        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25939        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
25940    }
25941}
25942
25943/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25944///
25945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25946#[inline]
25947#[target_feature(enable = "avx512f,avx512vl")]
25948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25949#[cfg_attr(test, assert_instr(vpunpckhdq))]
25950pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25951    unsafe {
25952        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25953        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
25954    }
25955}
25956
25957/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25958///
25959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25960#[inline]
25961#[target_feature(enable = "avx512f,avx512vl")]
25962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25963#[cfg_attr(test, assert_instr(vpunpckhdq))]
25964pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25965    unsafe {
25966        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25967        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
25968    }
25969}
25970
25971/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25972///
25973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
25974#[inline]
25975#[target_feature(enable = "avx512f")]
25976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25977#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
25978pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
25979    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
25980}
25981
25982/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25983///
25984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
25985#[inline]
25986#[target_feature(enable = "avx512f")]
25987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25988#[cfg_attr(test, assert_instr(vpunpckhqdq))]
25989pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
25990    unsafe {
25991        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
25992        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
25993    }
25994}
25995
25996/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25997///
25998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
25999#[inline]
26000#[target_feature(enable = "avx512f")]
26001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26002#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26003pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26004    unsafe {
26005        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26006        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
26007    }
26008}
26009
26010/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26011///
26012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26013#[inline]
26014#[target_feature(enable = "avx512f,avx512vl")]
26015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26016#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26017pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26018    unsafe {
26019        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26020        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
26021    }
26022}
26023
26024/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26025///
26026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26027#[inline]
26028#[target_feature(enable = "avx512f,avx512vl")]
26029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26030#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26031pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26032    unsafe {
26033        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26034        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
26035    }
26036}
26037
26038/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26039///
26040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26041#[inline]
26042#[target_feature(enable = "avx512f,avx512vl")]
26043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26044#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26045pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26046    unsafe {
26047        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26048        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
26049    }
26050}
26051
26052/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26053///
26054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26055#[inline]
26056#[target_feature(enable = "avx512f,avx512vl")]
26057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26058#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26059pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26060    unsafe {
26061        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26062        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
26063    }
26064}
26065
26066/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26067///
26068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26069#[inline]
26070#[target_feature(enable = "avx512f")]
26071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26072#[cfg_attr(test, assert_instr(vunpckhps))]
26073pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26074    unsafe {
26075        #[rustfmt::skip]
26076        simd_shuffle!(
26077            a, b,
26078            [ 2, 18, 3, 19,
26079              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26080              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26081              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26082        )
26083    }
26084}
26085
26086/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26087///
26088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26089#[inline]
26090#[target_feature(enable = "avx512f")]
26091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26092#[cfg_attr(test, assert_instr(vunpckhps))]
26093pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26094    unsafe {
26095        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26096        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
26097    }
26098}
26099
26100/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26101///
26102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26103#[inline]
26104#[target_feature(enable = "avx512f")]
26105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26106#[cfg_attr(test, assert_instr(vunpckhps))]
26107pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26108    unsafe {
26109        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26110        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
26111    }
26112}
26113
26114/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26115///
26116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26117#[inline]
26118#[target_feature(enable = "avx512f,avx512vl")]
26119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26120#[cfg_attr(test, assert_instr(vunpckhps))]
26121pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26122    unsafe {
26123        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26124        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
26125    }
26126}
26127
26128/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26129///
26130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26131#[inline]
26132#[target_feature(enable = "avx512f,avx512vl")]
26133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26134#[cfg_attr(test, assert_instr(vunpckhps))]
26135pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26136    unsafe {
26137        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26138        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
26139    }
26140}
26141
26142/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26143///
26144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26145#[inline]
26146#[target_feature(enable = "avx512f,avx512vl")]
26147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26148#[cfg_attr(test, assert_instr(vunpckhps))]
26149pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26150    unsafe {
26151        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26152        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
26153    }
26154}
26155
26156/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26157///
26158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26159#[inline]
26160#[target_feature(enable = "avx512f,avx512vl")]
26161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26162#[cfg_attr(test, assert_instr(vunpckhps))]
26163pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26164    unsafe {
26165        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26166        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
26167    }
26168}
26169
26170/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26171///
26172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26173#[inline]
26174#[target_feature(enable = "avx512f")]
26175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26176#[cfg_attr(test, assert_instr(vunpckhpd))]
26177pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26178    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26179}
26180
26181/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26182///
26183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26184#[inline]
26185#[target_feature(enable = "avx512f")]
26186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26187#[cfg_attr(test, assert_instr(vunpckhpd))]
26188pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26189    unsafe {
26190        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26191        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
26192    }
26193}
26194
26195/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26196///
26197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26198#[inline]
26199#[target_feature(enable = "avx512f")]
26200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26201#[cfg_attr(test, assert_instr(vunpckhpd))]
26202pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26203    unsafe {
26204        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26205        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
26206    }
26207}
26208
26209/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26210///
26211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26212#[inline]
26213#[target_feature(enable = "avx512f,avx512vl")]
26214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26215#[cfg_attr(test, assert_instr(vunpckhpd))]
26216pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26217    unsafe {
26218        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26219        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
26220    }
26221}
26222
26223/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26224///
26225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26226#[inline]
26227#[target_feature(enable = "avx512f,avx512vl")]
26228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26229#[cfg_attr(test, assert_instr(vunpckhpd))]
26230pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26231    unsafe {
26232        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26233        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
26234    }
26235}
26236
26237/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26238///
26239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26240#[inline]
26241#[target_feature(enable = "avx512f,avx512vl")]
26242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26243#[cfg_attr(test, assert_instr(vunpckhpd))]
26244pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26245    unsafe {
26246        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26247        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
26248    }
26249}
26250
26251/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26252///
26253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26254#[inline]
26255#[target_feature(enable = "avx512f,avx512vl")]
26256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26257#[cfg_attr(test, assert_instr(vunpckhpd))]
26258pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26259    unsafe {
26260        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26261        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
26262    }
26263}
26264
26265/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26266///
26267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26268#[inline]
26269#[target_feature(enable = "avx512f")]
26270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26271#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26272pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26273    unsafe {
26274        let a = a.as_i32x16();
26275        let b = b.as_i32x16();
26276        #[rustfmt::skip]
26277        let r: i32x16 = simd_shuffle!(
26278            a, b,
26279            [ 0, 16, 1, 17,
26280              0 + 4, 16 + 4, 1 + 4, 17 + 4,
26281              0 + 8, 16 + 8, 1 + 8, 17 + 8,
26282              0 + 12, 16 + 12, 1 + 12, 17 + 12],
26283        );
26284        transmute(r)
26285    }
26286}
26287
26288/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26289///
26290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26291#[inline]
26292#[target_feature(enable = "avx512f")]
26293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26294#[cfg_attr(test, assert_instr(vpunpckldq))]
26295pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26296    unsafe {
26297        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26298        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
26299    }
26300}
26301
26302/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26303///
26304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26305#[inline]
26306#[target_feature(enable = "avx512f")]
26307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26308#[cfg_attr(test, assert_instr(vpunpckldq))]
26309pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26310    unsafe {
26311        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26312        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
26313    }
26314}
26315
26316/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26317///
26318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26319#[inline]
26320#[target_feature(enable = "avx512f,avx512vl")]
26321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26322#[cfg_attr(test, assert_instr(vpunpckldq))]
26323pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26324    unsafe {
26325        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26326        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
26327    }
26328}
26329
26330/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26331///
26332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26333#[inline]
26334#[target_feature(enable = "avx512f,avx512vl")]
26335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26336#[cfg_attr(test, assert_instr(vpunpckldq))]
26337pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26338    unsafe {
26339        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26340        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
26341    }
26342}
26343
26344/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26345///
26346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26347#[inline]
26348#[target_feature(enable = "avx512f,avx512vl")]
26349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26350#[cfg_attr(test, assert_instr(vpunpckldq))]
26351pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26352    unsafe {
26353        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26354        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
26355    }
26356}
26357
26358/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26359///
26360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26361#[inline]
26362#[target_feature(enable = "avx512f,avx512vl")]
26363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26364#[cfg_attr(test, assert_instr(vpunpckldq))]
26365pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26366    unsafe {
26367        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26368        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
26369    }
26370}
26371
26372/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26373///
26374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26375#[inline]
26376#[target_feature(enable = "avx512f")]
26377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26378#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26379pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26380    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26381}
26382
26383/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26384///
26385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26386#[inline]
26387#[target_feature(enable = "avx512f")]
26388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26389#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26390pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26391    unsafe {
26392        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26393        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
26394    }
26395}
26396
26397/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26398///
26399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26400#[inline]
26401#[target_feature(enable = "avx512f")]
26402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26403#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26404pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26405    unsafe {
26406        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26407        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
26408    }
26409}
26410
26411/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26412///
26413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26414#[inline]
26415#[target_feature(enable = "avx512f,avx512vl")]
26416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26417#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26418pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26419    unsafe {
26420        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26421        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
26422    }
26423}
26424
26425/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26426///
26427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26428#[inline]
26429#[target_feature(enable = "avx512f,avx512vl")]
26430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26431#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26432pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26433    unsafe {
26434        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26435        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
26436    }
26437}
26438
26439/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26440///
26441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26442#[inline]
26443#[target_feature(enable = "avx512f,avx512vl")]
26444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26445#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26446pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26447    unsafe {
26448        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26449        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
26450    }
26451}
26452
26453/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26454///
26455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26456#[inline]
26457#[target_feature(enable = "avx512f,avx512vl")]
26458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26459#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26460pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26461    unsafe {
26462        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26463        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
26464    }
26465}
26466
26467/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26468///
26469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26470#[inline]
26471#[target_feature(enable = "avx512f")]
26472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26473#[cfg_attr(test, assert_instr(vunpcklps))]
26474pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26475    unsafe {
26476        #[rustfmt::skip]
26477        simd_shuffle!(a, b,
26478                       [ 0, 16, 1, 17,
26479                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
26480                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
26481                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
26482        )
26483    }
26484}
26485
26486/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26487///
26488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26489#[inline]
26490#[target_feature(enable = "avx512f")]
26491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26492#[cfg_attr(test, assert_instr(vunpcklps))]
26493pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26494    unsafe {
26495        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26496        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
26497    }
26498}
26499
26500/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26501///
26502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26503#[inline]
26504#[target_feature(enable = "avx512f")]
26505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26506#[cfg_attr(test, assert_instr(vunpcklps))]
26507pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26508    unsafe {
26509        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26510        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
26511    }
26512}
26513
26514/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26515///
26516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26517#[inline]
26518#[target_feature(enable = "avx512f,avx512vl")]
26519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26520#[cfg_attr(test, assert_instr(vunpcklps))]
26521pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26522    unsafe {
26523        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26524        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
26525    }
26526}
26527
26528/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26529///
26530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26531#[inline]
26532#[target_feature(enable = "avx512f,avx512vl")]
26533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26534#[cfg_attr(test, assert_instr(vunpcklps))]
26535pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26536    unsafe {
26537        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26538        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
26539    }
26540}
26541
26542/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26543///
26544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26545#[inline]
26546#[target_feature(enable = "avx512f,avx512vl")]
26547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26548#[cfg_attr(test, assert_instr(vunpcklps))]
26549pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26550    unsafe {
26551        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26552        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
26553    }
26554}
26555
26556/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26557///
26558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26559#[inline]
26560#[target_feature(enable = "avx512f,avx512vl")]
26561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26562#[cfg_attr(test, assert_instr(vunpcklps))]
26563pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26564    unsafe {
26565        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26566        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
26567    }
26568}
26569
26570/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26571///
26572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26573#[inline]
26574#[target_feature(enable = "avx512f")]
26575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26576#[cfg_attr(test, assert_instr(vunpcklpd))]
26577pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26578    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26579}
26580
26581/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26582///
26583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26584#[inline]
26585#[target_feature(enable = "avx512f")]
26586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26587#[cfg_attr(test, assert_instr(vunpcklpd))]
26588pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26589    unsafe {
26590        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26591        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
26592    }
26593}
26594
26595/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26596///
26597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26598#[inline]
26599#[target_feature(enable = "avx512f")]
26600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26601#[cfg_attr(test, assert_instr(vunpcklpd))]
26602pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26603    unsafe {
26604        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26605        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
26606    }
26607}
26608
26609/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26610///
26611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26612#[inline]
26613#[target_feature(enable = "avx512f,avx512vl")]
26614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26615#[cfg_attr(test, assert_instr(vunpcklpd))]
26616pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26617    unsafe {
26618        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26619        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
26620    }
26621}
26622
26623/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26624///
26625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26626#[inline]
26627#[target_feature(enable = "avx512f,avx512vl")]
26628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26629#[cfg_attr(test, assert_instr(vunpcklpd))]
26630pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26631    unsafe {
26632        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26633        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
26634    }
26635}
26636
26637/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26638///
26639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26640#[inline]
26641#[target_feature(enable = "avx512f,avx512vl")]
26642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26643#[cfg_attr(test, assert_instr(vunpcklpd))]
26644pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26645    unsafe {
26646        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26647        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
26648    }
26649}
26650
26651/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26652///
26653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26654#[inline]
26655#[target_feature(enable = "avx512f,avx512vl")]
26656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26657#[cfg_attr(test, assert_instr(vunpcklpd))]
26658pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26659    unsafe {
26660        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26661        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
26662    }
26663}
26664
26665/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26666///
26667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26668#[inline]
26669#[target_feature(enable = "avx512f")]
26670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26671pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26672    unsafe {
26673        simd_shuffle!(
26674            a,
26675            _mm_undefined_ps(),
26676            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26677        )
26678    }
26679}
26680
26681/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26682///
26683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26684#[inline]
26685#[target_feature(enable = "avx512f")]
26686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26687pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26688    unsafe {
26689        simd_shuffle!(
26690            a,
26691            _mm256_undefined_ps(),
26692            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26693        )
26694    }
26695}
26696
26697/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26698///
26699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26700#[inline]
26701#[target_feature(enable = "avx512f")]
26702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26703pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26704    unsafe {
26705        simd_shuffle!(
26706            a,
26707            _mm_set1_ps(0.),
26708            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26709        )
26710    }
26711}
26712
26713/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26714///
26715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26716#[inline]
26717#[target_feature(enable = "avx512f")]
26718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26719pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26720    unsafe {
26721        simd_shuffle!(
26722            a,
26723            _mm256_set1_ps(0.),
26724            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26725        )
26726    }
26727}
26728
26729/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26730///
26731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26732#[inline]
26733#[target_feature(enable = "avx512f")]
26734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26735pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26736    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26737}
26738
26739/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26740///
26741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26742#[inline]
26743#[target_feature(enable = "avx512f")]
26744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26745pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26746    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26747}
26748
26749/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26750///
26751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26752#[inline]
26753#[target_feature(enable = "avx512f")]
26754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26755pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26756    unsafe { transmute(a) }
26757}
26758
26759/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26760///
26761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26762#[inline]
26763#[target_feature(enable = "avx512f")]
26764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26765pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26766    unsafe { transmute(a) }
26767}
26768
26769/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26770///
26771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26772#[inline]
26773#[target_feature(enable = "avx512f")]
26774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26775pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26776    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26777}
26778
26779/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26780///
26781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26782#[inline]
26783#[target_feature(enable = "avx512f")]
26784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26785pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26786    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26787}
26788
26789/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26790///
26791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26792#[inline]
26793#[target_feature(enable = "avx512f")]
26794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26795pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26796    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26797}
26798
26799/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26800///
26801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26802#[inline]
26803#[target_feature(enable = "avx512f")]
26804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26805pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26806    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26807}
26808
26809/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26810///
26811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26812#[inline]
26813#[target_feature(enable = "avx512f")]
26814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26815pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26816    unsafe { simd_shuffle!(a, a, [0, 1]) }
26817}
26818
26819/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26820///
26821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26822#[inline]
26823#[target_feature(enable = "avx512f")]
26824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26825pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26826    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26827}
26828
26829/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26830///
26831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26832#[inline]
26833#[target_feature(enable = "avx512f")]
26834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26835pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26836    unsafe { transmute(a) }
26837}
26838
26839/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26840///
26841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26842#[inline]
26843#[target_feature(enable = "avx512f")]
26844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26845pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26846    unsafe { transmute(a) }
26847}
26848
26849/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26850///
26851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26852#[inline]
26853#[target_feature(enable = "avx512f")]
26854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26855pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26856    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26857}
26858
26859/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26860///
26861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26862#[inline]
26863#[target_feature(enable = "avx512f")]
26864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26865pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26866    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26867}
26868
26869/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26870///
26871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26872#[inline]
26873#[target_feature(enable = "avx512f")]
26874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26875pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26876    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26877}
26878
26879/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26880///
26881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26882#[inline]
26883#[target_feature(enable = "avx512f")]
26884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26885pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26886    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26887}
26888
26889/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26890///
26891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26892#[inline]
26893#[target_feature(enable = "avx512f")]
26894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26895pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26896    unsafe { simd_shuffle!(a, a, [0, 1]) }
26897}
26898
26899/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26900///
26901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26902#[inline]
26903#[target_feature(enable = "avx512f")]
26904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26905pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26906    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26907}
26908
26909/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26910///
26911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26912#[inline]
26913#[target_feature(enable = "avx512f")]
26914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26915pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26916    unsafe { transmute(a) }
26917}
26918
26919/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26920///
26921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26922#[inline]
26923#[target_feature(enable = "avx512f")]
26924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26925pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26926    unsafe { transmute(a) }
26927}
26928
26929/// Copy the lower 32-bit integer in a to dst.
26930///
26931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26932#[inline]
26933#[target_feature(enable = "avx512f")]
26934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26935#[cfg_attr(test, assert_instr(vmovd))]
26936pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26937    unsafe { simd_extract!(a.as_i32x16(), 0) }
26938}
26939
26940/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26941///
26942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26943#[inline]
26944#[target_feature(enable = "avx512f")]
26945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26946pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26947    unsafe { simd_extract!(a, 0) }
26948}
26949
26950/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26951///
26952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26953#[inline]
26954#[target_feature(enable = "avx512f")]
26955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26956pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26957    unsafe { simd_extract!(a, 0) }
26958}
26959
26960/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26961///
26962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26963#[inline]
26964#[target_feature(enable = "avx512f")]
26965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26966#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26967pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
26968    unsafe {
26969        let a = _mm512_castsi128_si512(a).as_i32x16();
26970        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
26971        transmute(ret)
26972    }
26973}
26974
26975/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26976///
26977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
26978#[inline]
26979#[target_feature(enable = "avx512f")]
26980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26981#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26982pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
26983    unsafe {
26984        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
26985        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
26986    }
26987}
26988
26989/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26990///
26991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
26992#[inline]
26993#[target_feature(enable = "avx512f")]
26994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26995#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26996pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
26997    unsafe {
26998        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
26999        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27000    }
27001}
27002
27003/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27004///
27005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27006#[inline]
27007#[target_feature(enable = "avx512f,avx512vl")]
27008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27009#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27010pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27011    unsafe {
27012        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27013        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27014    }
27015}
27016
27017/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27018///
27019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27020#[inline]
27021#[target_feature(enable = "avx512f,avx512vl")]
27022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27023#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27024pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27025    unsafe {
27026        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27027        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27028    }
27029}
27030
27031/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27032///
27033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27034#[inline]
27035#[target_feature(enable = "avx512f,avx512vl")]
27036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27037#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27038pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27039    unsafe {
27040        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27041        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
27042    }
27043}
27044
27045/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27046///
27047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27048#[inline]
27049#[target_feature(enable = "avx512f,avx512vl")]
27050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27051#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27052pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27053    unsafe {
27054        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27055        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
27056    }
27057}
27058
27059/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27060///
27061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27062#[inline]
27063#[target_feature(enable = "avx512f")]
27064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27065#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27066pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27067    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27068}
27069
27070/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27071///
27072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27073#[inline]
27074#[target_feature(enable = "avx512f")]
27075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27076#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27077pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27078    unsafe {
27079        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27080        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27081    }
27082}
27083
27084/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27085///
27086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27087#[inline]
27088#[target_feature(enable = "avx512f")]
27089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27090#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27091pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27092    unsafe {
27093        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27094        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27095    }
27096}
27097
27098/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27099///
27100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27101#[inline]
27102#[target_feature(enable = "avx512f,avx512vl")]
27103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27104#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27105pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27106    unsafe {
27107        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27108        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
27109    }
27110}
27111
27112/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27113///
27114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27115#[inline]
27116#[target_feature(enable = "avx512f,avx512vl")]
27117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27118#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27119pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27120    unsafe {
27121        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27122        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
27123    }
27124}
27125
27126/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27127///
27128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27129#[inline]
27130#[target_feature(enable = "avx512f,avx512vl")]
27131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27132#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27133pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27134    unsafe {
27135        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27136        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
27137    }
27138}
27139
27140/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27141///
27142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27143#[inline]
27144#[target_feature(enable = "avx512f,avx512vl")]
27145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27146#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27147pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27148    unsafe {
27149        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27150        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
27151    }
27152}
27153
27154/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27155///
27156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27157#[inline]
27158#[target_feature(enable = "avx512f")]
27159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27160#[cfg_attr(test, assert_instr(vbroadcastss))]
27161pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27162    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27163}
27164
27165/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27166///
27167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27168#[inline]
27169#[target_feature(enable = "avx512f")]
27170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27171#[cfg_attr(test, assert_instr(vbroadcastss))]
27172pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27173    unsafe {
27174        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27175        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27176    }
27177}
27178
27179/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27180///
27181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27182#[inline]
27183#[target_feature(enable = "avx512f")]
27184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27185#[cfg_attr(test, assert_instr(vbroadcastss))]
27186pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27187    unsafe {
27188        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27189        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27190    }
27191}
27192
27193/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27194///
27195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27196#[inline]
27197#[target_feature(enable = "avx512f,avx512vl")]
27198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27199#[cfg_attr(test, assert_instr(vbroadcastss))]
27200pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27201    unsafe {
27202        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27203        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27204    }
27205}
27206
27207/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27208///
27209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27210#[inline]
27211#[target_feature(enable = "avx512f,avx512vl")]
27212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27213#[cfg_attr(test, assert_instr(vbroadcastss))]
27214pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27215    unsafe {
27216        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27217        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27218    }
27219}
27220
27221/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27222///
27223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27224#[inline]
27225#[target_feature(enable = "avx512f,avx512vl")]
27226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27227#[cfg_attr(test, assert_instr(vbroadcastss))]
27228pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27229    unsafe {
27230        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27231        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
27232    }
27233}
27234
27235/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27236///
27237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27238#[inline]
27239#[target_feature(enable = "avx512f,avx512vl")]
27240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27241#[cfg_attr(test, assert_instr(vbroadcastss))]
27242pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27243    unsafe {
27244        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27245        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
27246    }
27247}
27248
27249/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27250///
27251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27252#[inline]
27253#[target_feature(enable = "avx512f")]
27254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27255#[cfg_attr(test, assert_instr(vbroadcastsd))]
27256pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27257    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27258}
27259
27260/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27261///
27262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27263#[inline]
27264#[target_feature(enable = "avx512f")]
27265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27266#[cfg_attr(test, assert_instr(vbroadcastsd))]
27267pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27268    unsafe {
27269        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27270        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27271    }
27272}
27273
27274/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27275///
27276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27277#[inline]
27278#[target_feature(enable = "avx512f")]
27279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27280#[cfg_attr(test, assert_instr(vbroadcastsd))]
27281pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27282    unsafe {
27283        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27284        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27285    }
27286}
27287
27288/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27289///
27290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27291#[inline]
27292#[target_feature(enable = "avx512f,avx512vl")]
27293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27294#[cfg_attr(test, assert_instr(vbroadcastsd))]
27295pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27296    unsafe {
27297        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27298        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
27299    }
27300}
27301
27302/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27303///
27304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27305#[inline]
27306#[target_feature(enable = "avx512f,avx512vl")]
27307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27308#[cfg_attr(test, assert_instr(vbroadcastsd))]
27309pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27310    unsafe {
27311        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27312        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
27313    }
27314}
27315
27316/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27317///
27318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27319#[inline]
27320#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27322pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27323    unsafe {
27324        let a = a.as_i32x4();
27325        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27326        transmute(ret)
27327    }
27328}
27329
27330/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27331///
27332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27333#[inline]
27334#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27336pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27337    unsafe {
27338        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27339        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27340    }
27341}
27342
27343/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27344///
27345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27346#[inline]
27347#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27349pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27350    unsafe {
27351        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27352        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27353    }
27354}
27355
27356/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27357///
27358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27359#[inline]
27360#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27362pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27363    unsafe {
27364        let a = a.as_i32x4();
27365        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27366        transmute(ret)
27367    }
27368}
27369
27370/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27371///
27372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27373#[inline]
27374#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27376pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27377    unsafe {
27378        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27379        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27380    }
27381}
27382
27383/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27384///
27385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27386#[inline]
27387#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27389pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27390    unsafe {
27391        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27392        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27393    }
27394}
27395
27396/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27397///
27398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27399#[inline]
27400#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27402pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27403    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27404}
27405
27406/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27407///
27408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27409#[inline]
27410#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27412pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27413    unsafe {
27414        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27415        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27416    }
27417}
27418
27419/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27420///
27421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27422#[inline]
27423#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27425pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27426    unsafe {
27427        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27428        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27429    }
27430}
27431
27432/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27433///
27434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27435#[inline]
27436#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27438pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27439    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27440}
27441
27442/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27443///
27444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27445#[inline]
27446#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27448pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27449    unsafe {
27450        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27451        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27452    }
27453}
27454
27455/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27456///
27457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27458#[inline]
27459#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27461pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27462    unsafe {
27463        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27464        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27465    }
27466}
27467
27468/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27469///
27470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27471#[inline]
27472#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27474pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27475    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27476}
27477
27478/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27479///
27480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27481#[inline]
27482#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27484pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27485    unsafe {
27486        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27487        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27488    }
27489}
27490
27491/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27492///
27493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27494#[inline]
27495#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27497pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27498    unsafe {
27499        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27500        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27501    }
27502}
27503
27504/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27505///
27506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27507#[inline]
27508#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27510pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27511    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27512}
27513
27514/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27515///
27516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27517#[inline]
27518#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27520pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27521    unsafe {
27522        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27523        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27524    }
27525}
27526
27527/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27528///
27529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27530#[inline]
27531#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27533pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27534    unsafe {
27535        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27536        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27537    }
27538}
27539
27540/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27541///
27542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27543#[inline]
27544#[target_feature(enable = "avx512f")]
27545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27546#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27547pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27548    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
27549}
27550
27551/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27552///
27553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27554#[inline]
27555#[target_feature(enable = "avx512f,avx512vl")]
27556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27557#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27558pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27559    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
27560}
27561
27562/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27563///
27564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27565#[inline]
27566#[target_feature(enable = "avx512f,avx512vl")]
27567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27568#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27569pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27570    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
27571}
27572
27573/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27574///
27575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27576#[inline]
27577#[target_feature(enable = "avx512f")]
27578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27579#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27580pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27581    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
27582}
27583
27584/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27585///
27586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27587#[inline]
27588#[target_feature(enable = "avx512f,avx512vl")]
27589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27590#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27591pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27592    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
27593}
27594
27595/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27596///
27597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27598#[inline]
27599#[target_feature(enable = "avx512f,avx512vl")]
27600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27601#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27602pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27603    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
27604}
27605
27606/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27607///
27608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27609#[inline]
27610#[target_feature(enable = "avx512f")]
27611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27612#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27613pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27614    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
27615}
27616
27617/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27618///
27619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27620#[inline]
27621#[target_feature(enable = "avx512f,avx512vl")]
27622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27623#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27624pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27625    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
27626}
27627
27628/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27629///
27630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27631#[inline]
27632#[target_feature(enable = "avx512f,avx512vl")]
27633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27634#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27635pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27636    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
27637}
27638
27639/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27640///
27641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27642#[inline]
27643#[target_feature(enable = "avx512f")]
27644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27645#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27646pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27647    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
27648}
27649
27650/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27651///
27652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27653#[inline]
27654#[target_feature(enable = "avx512f,avx512vl")]
27655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27656#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27657pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27658    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
27659}
27660
27661/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27662///
27663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27664#[inline]
27665#[target_feature(enable = "avx512f,avx512vl")]
27666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27667#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27668pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27669    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
27670}
27671
27672/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27673///
27674/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27675///
27676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27677#[inline]
27678#[target_feature(enable = "avx512f")]
27679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27680#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27681#[rustc_legacy_const_generics(2)]
27682pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27683    unsafe {
27684        static_assert_uimm_bits!(IMM8, 8);
27685        let a = a.as_i32x16();
27686        let b = b.as_i32x16();
27687        let imm8: i32 = IMM8 % 16;
27688        let r: i32x16 = match imm8 {
27689            0 => simd_shuffle!(
27690                a,
27691                b,
27692                [
27693                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27694                ],
27695            ),
27696            1 => simd_shuffle!(
27697                a,
27698                b,
27699                [
27700                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27701                ],
27702            ),
27703            2 => simd_shuffle!(
27704                a,
27705                b,
27706                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27707            ),
27708            3 => simd_shuffle!(
27709                a,
27710                b,
27711                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27712            ),
27713            4 => simd_shuffle!(
27714                a,
27715                b,
27716                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27717            ),
27718            5 => simd_shuffle!(
27719                a,
27720                b,
27721                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27722            ),
27723            6 => simd_shuffle!(
27724                a,
27725                b,
27726                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27727            ),
27728            7 => simd_shuffle!(
27729                a,
27730                b,
27731                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27732            ),
27733            8 => simd_shuffle!(
27734                a,
27735                b,
27736                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27737            ),
27738            9 => simd_shuffle!(
27739                a,
27740                b,
27741                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27742            ),
27743            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27744            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27745            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27746            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27747            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27748            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27749            _ => unreachable_unchecked(),
27750        };
27751        transmute(r)
27752    }
27753}
27754
27755/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27756///
27757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27758#[inline]
27759#[target_feature(enable = "avx512f")]
27760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27761#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27762#[rustc_legacy_const_generics(4)]
27763pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27764    src: __m512i,
27765    k: __mmask16,
27766    a: __m512i,
27767    b: __m512i,
27768) -> __m512i {
27769    unsafe {
27770        static_assert_uimm_bits!(IMM8, 8);
27771        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27772        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
27773    }
27774}
27775
27776/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27777///
27778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27779#[inline]
27780#[target_feature(enable = "avx512f")]
27781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27782#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27783#[rustc_legacy_const_generics(3)]
27784pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27785    unsafe {
27786        static_assert_uimm_bits!(IMM8, 8);
27787        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27788        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
27789    }
27790}
27791
27792/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27793///
27794/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27795///
27796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27797#[inline]
27798#[target_feature(enable = "avx512f,avx512vl")]
27799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27800#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27801#[rustc_legacy_const_generics(2)]
27802pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27803    unsafe {
27804        static_assert_uimm_bits!(IMM8, 8);
27805        let a = a.as_i32x8();
27806        let b = b.as_i32x8();
27807        let imm8: i32 = IMM8 % 8;
27808        let r: i32x8 = match imm8 {
27809            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27810            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27811            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27812            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27813            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27814            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27815            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27816            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27817            _ => unreachable_unchecked(),
27818        };
27819        transmute(r)
27820    }
27821}
27822
27823/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27824///
27825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27826#[inline]
27827#[target_feature(enable = "avx512f,avx512vl")]
27828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27829#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27830#[rustc_legacy_const_generics(4)]
27831pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27832    src: __m256i,
27833    k: __mmask8,
27834    a: __m256i,
27835    b: __m256i,
27836) -> __m256i {
27837    unsafe {
27838        static_assert_uimm_bits!(IMM8, 8);
27839        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27840        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
27841    }
27842}
27843
27844/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27845///
27846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27847#[inline]
27848#[target_feature(enable = "avx512f,avx512vl")]
27849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27850#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27851#[rustc_legacy_const_generics(3)]
27852pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27853    unsafe {
27854        static_assert_uimm_bits!(IMM8, 8);
27855        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27856        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
27857    }
27858}
27859
27860/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27861///
27862/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27863///
27864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27865#[inline]
27866#[target_feature(enable = "avx512f,avx512vl")]
27867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27868#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27869#[rustc_legacy_const_generics(2)]
27870pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27871    unsafe {
27872        static_assert_uimm_bits!(IMM8, 8);
27873        let a = a.as_i32x4();
27874        let b = b.as_i32x4();
27875        let imm8: i32 = IMM8 % 4;
27876        let r: i32x4 = match imm8 {
27877            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27878            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27879            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27880            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27881            _ => unreachable_unchecked(),
27882        };
27883        transmute(r)
27884    }
27885}
27886
27887/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27888///
27889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27890#[inline]
27891#[target_feature(enable = "avx512f,avx512vl")]
27892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27893#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27894#[rustc_legacy_const_generics(4)]
27895pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27896    src: __m128i,
27897    k: __mmask8,
27898    a: __m128i,
27899    b: __m128i,
27900) -> __m128i {
27901    unsafe {
27902        static_assert_uimm_bits!(IMM8, 8);
27903        let r = _mm_alignr_epi32::<IMM8>(a, b);
27904        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
27905    }
27906}
27907
27908/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27909///
27910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27911#[inline]
27912#[target_feature(enable = "avx512f,avx512vl")]
27913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27914#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27915#[rustc_legacy_const_generics(3)]
27916pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27917    unsafe {
27918        static_assert_uimm_bits!(IMM8, 8);
27919        let r = _mm_alignr_epi32::<IMM8>(a, b);
27920        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
27921    }
27922}
27923
27924/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27925///
27926/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27927///
27928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27929#[inline]
27930#[target_feature(enable = "avx512f")]
27931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27932#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27933#[rustc_legacy_const_generics(2)]
27934pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27935    unsafe {
27936        static_assert_uimm_bits!(IMM8, 8);
27937        let imm8: i32 = IMM8 % 8;
27938        let r: i64x8 = match imm8 {
27939            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27940            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27941            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27942            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27943            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27944            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27945            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27946            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27947            _ => unreachable_unchecked(),
27948        };
27949        transmute(r)
27950    }
27951}
27952
27953/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27954///
27955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27956#[inline]
27957#[target_feature(enable = "avx512f")]
27958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27959#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27960#[rustc_legacy_const_generics(4)]
27961pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27962    src: __m512i,
27963    k: __mmask8,
27964    a: __m512i,
27965    b: __m512i,
27966) -> __m512i {
27967    unsafe {
27968        static_assert_uimm_bits!(IMM8, 8);
27969        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27970        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
27971    }
27972}
27973
27974/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27975///
27976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
27977#[inline]
27978#[target_feature(enable = "avx512f")]
27979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27980#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27981#[rustc_legacy_const_generics(3)]
27982pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27983    unsafe {
27984        static_assert_uimm_bits!(IMM8, 8);
27985        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27986        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
27987    }
27988}
27989
27990/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
27991///
27992/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
27993///
27994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
27995#[inline]
27996#[target_feature(enable = "avx512f,avx512vl")]
27997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27998#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27999#[rustc_legacy_const_generics(2)]
28000pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28001    unsafe {
28002        static_assert_uimm_bits!(IMM8, 8);
28003        let imm8: i32 = IMM8 % 4;
28004        let r: i64x4 = match imm8 {
28005            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
28006            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
28007            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
28008            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
28009            _ => unreachable_unchecked(),
28010        };
28011        transmute(r)
28012    }
28013}
28014
28015/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28016///
28017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28018#[inline]
28019#[target_feature(enable = "avx512f,avx512vl")]
28020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28021#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28022#[rustc_legacy_const_generics(4)]
28023pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28024    src: __m256i,
28025    k: __mmask8,
28026    a: __m256i,
28027    b: __m256i,
28028) -> __m256i {
28029    unsafe {
28030        static_assert_uimm_bits!(IMM8, 8);
28031        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28032        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
28033    }
28034}
28035
28036/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28037///
28038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28039#[inline]
28040#[target_feature(enable = "avx512f,avx512vl")]
28041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28042#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28043#[rustc_legacy_const_generics(3)]
28044pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28045    unsafe {
28046        static_assert_uimm_bits!(IMM8, 8);
28047        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28048        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
28049    }
28050}
28051
28052/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28053///
28054/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28055///
28056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28057#[inline]
28058#[target_feature(enable = "avx512f,avx512vl")]
28059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28060#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28061#[rustc_legacy_const_generics(2)]
28062pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28063    unsafe {
28064        static_assert_uimm_bits!(IMM8, 8);
28065        let imm8: i32 = IMM8 % 2;
28066        let r: i64x2 = match imm8 {
28067            0 => simd_shuffle!(a, b, [2, 3]),
28068            1 => simd_shuffle!(a, b, [3, 0]),
28069            _ => unreachable_unchecked(),
28070        };
28071        transmute(r)
28072    }
28073}
28074
28075/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28076///
28077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28078#[inline]
28079#[target_feature(enable = "avx512f,avx512vl")]
28080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28081#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28082#[rustc_legacy_const_generics(4)]
28083pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28084    src: __m128i,
28085    k: __mmask8,
28086    a: __m128i,
28087    b: __m128i,
28088) -> __m128i {
28089    unsafe {
28090        static_assert_uimm_bits!(IMM8, 8);
28091        let r = _mm_alignr_epi64::<IMM8>(a, b);
28092        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
28093    }
28094}
28095
28096/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28097///
28098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28099#[inline]
28100#[target_feature(enable = "avx512f,avx512vl")]
28101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28102#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28103#[rustc_legacy_const_generics(3)]
28104pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28105    unsafe {
28106        static_assert_uimm_bits!(IMM8, 8);
28107        let r = _mm_alignr_epi64::<IMM8>(a, b);
28108        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
28109    }
28110}
28111
28112/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28113///
28114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28115#[inline]
28116#[target_feature(enable = "avx512f")]
28117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28118#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28119pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28120    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28121}
28122
28123/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28124///
28125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28126#[inline]
28127#[target_feature(enable = "avx512f")]
28128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28129#[cfg_attr(test, assert_instr(vpandd))]
28130pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28131    unsafe {
28132        let and = _mm512_and_epi32(a, b).as_i32x16();
28133        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
28134    }
28135}
28136
28137/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28138///
28139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28140#[inline]
28141#[target_feature(enable = "avx512f")]
28142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28143#[cfg_attr(test, assert_instr(vpandd))]
28144pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28145    unsafe {
28146        let and = _mm512_and_epi32(a, b).as_i32x16();
28147        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
28148    }
28149}
28150
28151/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28152///
28153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28154#[inline]
28155#[target_feature(enable = "avx512f,avx512vl")]
28156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28157#[cfg_attr(test, assert_instr(vpandd))]
28158pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28159    unsafe {
28160        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28161        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
28162    }
28163}
28164
28165/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28166///
28167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28168#[inline]
28169#[target_feature(enable = "avx512f,avx512vl")]
28170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28171#[cfg_attr(test, assert_instr(vpandd))]
28172pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28173    unsafe {
28174        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28175        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
28176    }
28177}
28178
28179/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28180///
28181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28182#[inline]
28183#[target_feature(enable = "avx512f,avx512vl")]
28184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28185#[cfg_attr(test, assert_instr(vpandd))]
28186pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28187    unsafe {
28188        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28189        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
28190    }
28191}
28192
28193/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28194///
28195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28196#[inline]
28197#[target_feature(enable = "avx512f,avx512vl")]
28198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28199#[cfg_attr(test, assert_instr(vpandd))]
28200pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28201    unsafe {
28202        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28203        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
28204    }
28205}
28206
28207/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28208///
28209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28210#[inline]
28211#[target_feature(enable = "avx512f")]
28212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28213#[cfg_attr(test, assert_instr(vpandq))]
28214pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28215    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
28216}
28217
28218/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28219///
28220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28221#[inline]
28222#[target_feature(enable = "avx512f")]
28223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28224#[cfg_attr(test, assert_instr(vpandq))]
28225pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28226    unsafe {
28227        let and = _mm512_and_epi64(a, b).as_i64x8();
28228        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
28229    }
28230}
28231
28232/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28233///
28234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28235#[inline]
28236#[target_feature(enable = "avx512f")]
28237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28238#[cfg_attr(test, assert_instr(vpandq))]
28239pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28240    unsafe {
28241        let and = _mm512_and_epi64(a, b).as_i64x8();
28242        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
28243    }
28244}
28245
28246/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28247///
28248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28249#[inline]
28250#[target_feature(enable = "avx512f,avx512vl")]
28251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28252#[cfg_attr(test, assert_instr(vpandq))]
28253pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28254    unsafe {
28255        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28256        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
28257    }
28258}
28259
28260/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28261///
28262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28263#[inline]
28264#[target_feature(enable = "avx512f,avx512vl")]
28265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28266#[cfg_attr(test, assert_instr(vpandq))]
28267pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28268    unsafe {
28269        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28270        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
28271    }
28272}
28273
28274/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28275///
28276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28277#[inline]
28278#[target_feature(enable = "avx512f,avx512vl")]
28279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28280#[cfg_attr(test, assert_instr(vpandq))]
28281pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28282    unsafe {
28283        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28284        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
28285    }
28286}
28287
28288/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28289///
28290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28291#[inline]
28292#[target_feature(enable = "avx512f,avx512vl")]
28293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28294#[cfg_attr(test, assert_instr(vpandq))]
28295pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28296    unsafe {
28297        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28298        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
28299    }
28300}
28301
28302/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28303///
28304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28305#[inline]
28306#[target_feature(enable = "avx512f")]
28307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28308#[cfg_attr(test, assert_instr(vpandq))]
28309pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28310    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28311}
28312
28313/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28314///
28315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28316#[inline]
28317#[target_feature(enable = "avx512f")]
28318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28319#[cfg_attr(test, assert_instr(vporq))]
28320pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28321    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28322}
28323
28324/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28325///
28326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28327#[inline]
28328#[target_feature(enable = "avx512f")]
28329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28330#[cfg_attr(test, assert_instr(vpord))]
28331pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28332    unsafe {
28333        let or = _mm512_or_epi32(a, b).as_i32x16();
28334        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
28335    }
28336}
28337
28338/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28339///
28340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28341#[inline]
28342#[target_feature(enable = "avx512f")]
28343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28344#[cfg_attr(test, assert_instr(vpord))]
28345pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28346    unsafe {
28347        let or = _mm512_or_epi32(a, b).as_i32x16();
28348        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
28349    }
28350}
28351
28352/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28353///
28354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28355#[inline]
28356#[target_feature(enable = "avx512f,avx512vl")]
28357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28358#[cfg_attr(test, assert_instr(vor))] //should be vpord
28359pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28360    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
28361}
28362
28363/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28364///
28365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28366#[inline]
28367#[target_feature(enable = "avx512f,avx512vl")]
28368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28369#[cfg_attr(test, assert_instr(vpord))]
28370pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28371    unsafe {
28372        let or = _mm256_or_epi32(a, b).as_i32x8();
28373        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
28374    }
28375}
28376
28377/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28378///
28379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28380#[inline]
28381#[target_feature(enable = "avx512f,avx512vl")]
28382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28383#[cfg_attr(test, assert_instr(vpord))]
28384pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28385    unsafe {
28386        let or = _mm256_or_epi32(a, b).as_i32x8();
28387        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
28388    }
28389}
28390
28391/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28392///
28393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28394#[inline]
28395#[target_feature(enable = "avx512f,avx512vl")]
28396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28397#[cfg_attr(test, assert_instr(vor))] //should be vpord
28398pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28399    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
28400}
28401
28402/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28403///
28404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28405#[inline]
28406#[target_feature(enable = "avx512f,avx512vl")]
28407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28408#[cfg_attr(test, assert_instr(vpord))]
28409pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28410    unsafe {
28411        let or = _mm_or_epi32(a, b).as_i32x4();
28412        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
28413    }
28414}
28415
28416/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28417///
28418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28419#[inline]
28420#[target_feature(enable = "avx512f,avx512vl")]
28421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28422#[cfg_attr(test, assert_instr(vpord))]
28423pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28424    unsafe {
28425        let or = _mm_or_epi32(a, b).as_i32x4();
28426        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
28427    }
28428}
28429
28430/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28431///
28432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28433#[inline]
28434#[target_feature(enable = "avx512f")]
28435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28436#[cfg_attr(test, assert_instr(vporq))]
28437pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28438    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
28439}
28440
28441/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28442///
28443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28444#[inline]
28445#[target_feature(enable = "avx512f")]
28446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28447#[cfg_attr(test, assert_instr(vporq))]
28448pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28449    unsafe {
28450        let or = _mm512_or_epi64(a, b).as_i64x8();
28451        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
28452    }
28453}
28454
28455/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28456///
28457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28458#[inline]
28459#[target_feature(enable = "avx512f")]
28460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28461#[cfg_attr(test, assert_instr(vporq))]
28462pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28463    unsafe {
28464        let or = _mm512_or_epi64(a, b).as_i64x8();
28465        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
28466    }
28467}
28468
28469/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28470///
28471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28472#[inline]
28473#[target_feature(enable = "avx512f,avx512vl")]
28474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28475#[cfg_attr(test, assert_instr(vor))] //should be vporq
28476pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28477    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
28478}
28479
28480/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28481///
28482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28483#[inline]
28484#[target_feature(enable = "avx512f,avx512vl")]
28485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28486#[cfg_attr(test, assert_instr(vporq))]
28487pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28488    unsafe {
28489        let or = _mm256_or_epi64(a, b).as_i64x4();
28490        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
28491    }
28492}
28493
28494/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28495///
28496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28497#[inline]
28498#[target_feature(enable = "avx512f,avx512vl")]
28499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28500#[cfg_attr(test, assert_instr(vporq))]
28501pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28502    unsafe {
28503        let or = _mm256_or_epi64(a, b).as_i64x4();
28504        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
28505    }
28506}
28507
28508/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28509///
28510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28511#[inline]
28512#[target_feature(enable = "avx512f,avx512vl")]
28513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28514#[cfg_attr(test, assert_instr(vor))] //should be vporq
28515pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28516    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
28517}
28518
28519/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28520///
28521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28522#[inline]
28523#[target_feature(enable = "avx512f,avx512vl")]
28524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28525#[cfg_attr(test, assert_instr(vporq))]
28526pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28527    unsafe {
28528        let or = _mm_or_epi64(a, b).as_i64x2();
28529        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
28530    }
28531}
28532
28533/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28534///
28535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28536#[inline]
28537#[target_feature(enable = "avx512f,avx512vl")]
28538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28539#[cfg_attr(test, assert_instr(vporq))]
28540pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28541    unsafe {
28542        let or = _mm_or_epi64(a, b).as_i64x2();
28543        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
28544    }
28545}
28546
28547/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28548///
28549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28550#[inline]
28551#[target_feature(enable = "avx512f")]
28552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28553#[cfg_attr(test, assert_instr(vporq))]
28554pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28555    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28556}
28557
28558/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28559///
28560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28561#[inline]
28562#[target_feature(enable = "avx512f")]
28563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28564#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28565pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28566    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28567}
28568
28569/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28570///
28571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28572#[inline]
28573#[target_feature(enable = "avx512f")]
28574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28575#[cfg_attr(test, assert_instr(vpxord))]
28576pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28577    unsafe {
28578        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28579        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
28580    }
28581}
28582
28583/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28584///
28585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28586#[inline]
28587#[target_feature(enable = "avx512f")]
28588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28589#[cfg_attr(test, assert_instr(vpxord))]
28590pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28591    unsafe {
28592        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28593        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
28594    }
28595}
28596
28597/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28598///
28599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28600#[inline]
28601#[target_feature(enable = "avx512f,avx512vl")]
28602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28603#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28604pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28605    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
28606}
28607
28608/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28609///
28610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28611#[inline]
28612#[target_feature(enable = "avx512f,avx512vl")]
28613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28614#[cfg_attr(test, assert_instr(vpxord))]
28615pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28616    unsafe {
28617        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28618        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
28619    }
28620}
28621
28622/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28623///
28624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28625#[inline]
28626#[target_feature(enable = "avx512f,avx512vl")]
28627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28628#[cfg_attr(test, assert_instr(vpxord))]
28629pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28630    unsafe {
28631        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28632        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
28633    }
28634}
28635
28636/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28637///
28638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28639#[inline]
28640#[target_feature(enable = "avx512f,avx512vl")]
28641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28642#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28643pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28644    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
28645}
28646
28647/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28648///
28649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28650#[inline]
28651#[target_feature(enable = "avx512f,avx512vl")]
28652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28653#[cfg_attr(test, assert_instr(vpxord))]
28654pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28655    unsafe {
28656        let xor = _mm_xor_epi32(a, b).as_i32x4();
28657        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
28658    }
28659}
28660
28661/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28662///
28663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28664#[inline]
28665#[target_feature(enable = "avx512f,avx512vl")]
28666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28667#[cfg_attr(test, assert_instr(vpxord))]
28668pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28669    unsafe {
28670        let xor = _mm_xor_epi32(a, b).as_i32x4();
28671        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
28672    }
28673}
28674
28675/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28676///
28677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28678#[inline]
28679#[target_feature(enable = "avx512f")]
28680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28681#[cfg_attr(test, assert_instr(vpxorq))]
28682pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28683    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
28684}
28685
28686/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28687///
28688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28689#[inline]
28690#[target_feature(enable = "avx512f")]
28691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28692#[cfg_attr(test, assert_instr(vpxorq))]
28693pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28694    unsafe {
28695        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28696        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
28697    }
28698}
28699
28700/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28701///
28702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28703#[inline]
28704#[target_feature(enable = "avx512f")]
28705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28706#[cfg_attr(test, assert_instr(vpxorq))]
28707pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28708    unsafe {
28709        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28710        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
28711    }
28712}
28713
28714/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28715///
28716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28717#[inline]
28718#[target_feature(enable = "avx512f,avx512vl")]
28719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28720#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28721pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28722    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
28723}
28724
28725/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28726///
28727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28728#[inline]
28729#[target_feature(enable = "avx512f,avx512vl")]
28730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28731#[cfg_attr(test, assert_instr(vpxorq))]
28732pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28733    unsafe {
28734        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28735        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
28736    }
28737}
28738
28739/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28740///
28741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28742#[inline]
28743#[target_feature(enable = "avx512f,avx512vl")]
28744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28745#[cfg_attr(test, assert_instr(vpxorq))]
28746pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28747    unsafe {
28748        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28749        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
28750    }
28751}
28752
28753/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28754///
28755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28756#[inline]
28757#[target_feature(enable = "avx512f,avx512vl")]
28758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28759#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28760pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28761    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
28762}
28763
28764/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28765///
28766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28767#[inline]
28768#[target_feature(enable = "avx512f,avx512vl")]
28769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28770#[cfg_attr(test, assert_instr(vpxorq))]
28771pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28772    unsafe {
28773        let xor = _mm_xor_epi64(a, b).as_i64x2();
28774        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
28775    }
28776}
28777
28778/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28779///
28780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28781#[inline]
28782#[target_feature(enable = "avx512f,avx512vl")]
28783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28784#[cfg_attr(test, assert_instr(vpxorq))]
28785pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28786    unsafe {
28787        let xor = _mm_xor_epi64(a, b).as_i64x2();
28788        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
28789    }
28790}
28791
28792/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28793///
28794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28795#[inline]
28796#[target_feature(enable = "avx512f")]
28797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28798#[cfg_attr(test, assert_instr(vpxorq))]
28799pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28800    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28801}
28802
28803/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28804///
28805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28806#[inline]
28807#[target_feature(enable = "avx512f")]
28808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28809#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28810pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28811    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
28812}
28813
28814/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28815///
28816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28817#[inline]
28818#[target_feature(enable = "avx512f")]
28819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28820#[cfg_attr(test, assert_instr(vpandnd))]
28821pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28822    unsafe {
28823        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28824        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
28825    }
28826}
28827
28828/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28829///
28830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28831#[inline]
28832#[target_feature(enable = "avx512f")]
28833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28834#[cfg_attr(test, assert_instr(vpandnd))]
28835pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28836    unsafe {
28837        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28838        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
28839    }
28840}
28841
28842/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28843///
28844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28845#[inline]
28846#[target_feature(enable = "avx512f,avx512vl")]
28847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28848#[cfg_attr(test, assert_instr(vpandnd))]
28849pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28850    unsafe {
28851        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28852        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28853        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
28854    }
28855}
28856
28857/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28858///
28859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28860#[inline]
28861#[target_feature(enable = "avx512f,avx512vl")]
28862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28863#[cfg_attr(test, assert_instr(vpandnd))]
28864pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28865    unsafe {
28866        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28867        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28868        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
28869    }
28870}
28871
28872/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28873///
28874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28875#[inline]
28876#[target_feature(enable = "avx512f,avx512vl")]
28877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28878#[cfg_attr(test, assert_instr(vpandnd))]
28879pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28880    unsafe {
28881        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28882        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28883        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
28884    }
28885}
28886
28887/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28888///
28889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28890#[inline]
28891#[target_feature(enable = "avx512f,avx512vl")]
28892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28893#[cfg_attr(test, assert_instr(vpandnd))]
28894pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28895    unsafe {
28896        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28897        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28898        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
28899    }
28900}
28901
28902/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28903///
28904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28905#[inline]
28906#[target_feature(enable = "avx512f")]
28907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28908#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28909pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28910    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
28911}
28912
28913/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28914///
28915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28916#[inline]
28917#[target_feature(enable = "avx512f")]
28918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28919#[cfg_attr(test, assert_instr(vpandnq))]
28920pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28921    unsafe {
28922        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28923        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
28924    }
28925}
28926
28927/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28928///
28929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28930#[inline]
28931#[target_feature(enable = "avx512f")]
28932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28933#[cfg_attr(test, assert_instr(vpandnq))]
28934pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28935    unsafe {
28936        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28937        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
28938    }
28939}
28940
28941/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28942///
28943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28944#[inline]
28945#[target_feature(enable = "avx512f,avx512vl")]
28946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28947#[cfg_attr(test, assert_instr(vpandnq))]
28948pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28949    unsafe {
28950        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28951        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28952        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
28953    }
28954}
28955
28956/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28957///
28958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28959#[inline]
28960#[target_feature(enable = "avx512f,avx512vl")]
28961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28962#[cfg_attr(test, assert_instr(vpandnq))]
28963pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28964    unsafe {
28965        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28966        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28967        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
28968    }
28969}
28970
28971/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28972///
28973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
28974#[inline]
28975#[target_feature(enable = "avx512f,avx512vl")]
28976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28977#[cfg_attr(test, assert_instr(vpandnq))]
28978pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28979    unsafe {
28980        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
28981        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
28982        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
28983    }
28984}
28985
28986/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28987///
28988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
28989#[inline]
28990#[target_feature(enable = "avx512f,avx512vl")]
28991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28992#[cfg_attr(test, assert_instr(vpandnq))]
28993pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28994    unsafe {
28995        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
28996        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
28997        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
28998    }
28999}
29000
29001/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29002///
29003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29004#[inline]
29005#[target_feature(enable = "avx512f")]
29006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29007#[cfg_attr(test, assert_instr(vpandnq))]
29008pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29009    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
29010}
29011
29012/// Convert 16-bit mask a into an integer value, and store the result in dst.
29013///
29014/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29015#[inline]
29016#[target_feature(enable = "avx512f")]
29017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29018pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29019    a as u32
29020}
29021
29022/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29023///
29024/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29025#[inline]
29026#[target_feature(enable = "avx512f")]
29027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29028pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29029    a as __mmask16
29030}
29031
29032/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29033///
29034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29035#[inline]
29036#[target_feature(enable = "avx512f")]
29037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29038#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29039pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29040    a & b
29041}
29042
29043/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29044///
29045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29046#[inline]
29047#[target_feature(enable = "avx512f")]
29048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29049#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29050pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29051    a & b
29052}
29053
29054/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29055///
29056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29057#[inline]
29058#[target_feature(enable = "avx512f")]
29059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29060#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29061pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29062    a | b
29063}
29064
29065/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29066///
29067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29068#[inline]
29069#[target_feature(enable = "avx512f")]
29070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29071#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29072pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29073    a | b
29074}
29075
29076/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29077///
29078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29079#[inline]
29080#[target_feature(enable = "avx512f")]
29081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29082#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29083pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29084    a ^ b
29085}
29086
29087/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29088///
29089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29090#[inline]
29091#[target_feature(enable = "avx512f")]
29092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29093#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29094pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29095    a ^ b
29096}
29097
29098/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29099///
29100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29101#[inline]
29102#[target_feature(enable = "avx512f")]
29103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29104pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29105    a ^ 0b11111111_11111111
29106}
29107
29108/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29109///
29110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29111#[inline]
29112#[target_feature(enable = "avx512f")]
29113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29114pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29115    a ^ 0b11111111_11111111
29116}
29117
29118/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29119///
29120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29121#[inline]
29122#[target_feature(enable = "avx512f")]
29123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29124#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29125pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29126    _mm512_kand(_mm512_knot(a), b)
29127}
29128
29129/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29130///
29131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29132#[inline]
29133#[target_feature(enable = "avx512f")]
29134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29135#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29136pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29137    _mm512_kand(_mm512_knot(a), b)
29138}
29139
29140/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29141///
29142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29143#[inline]
29144#[target_feature(enable = "avx512f")]
29145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29146#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29147pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29148    _mm512_knot(_mm512_kxor(a, b))
29149}
29150
29151/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29152///
29153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29154#[inline]
29155#[target_feature(enable = "avx512f")]
29156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29157#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29158pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29159    _mm512_knot(_mm512_kxor(a, b))
29160}
29161
29162/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29163/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29164///
29165/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29166#[inline]
29167#[target_feature(enable = "avx512f")]
29168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29169pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29170    let tmp = _kor_mask16(a, b);
29171    *all_ones = (tmp == 0xffff) as u8;
29172    (tmp == 0) as u8
29173}
29174
29175/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29176/// store 0 in dst.
29177///
29178/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29179#[inline]
29180#[target_feature(enable = "avx512f")]
29181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29182pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29183    (_kor_mask16(a, b) == 0xffff) as u8
29184}
29185
29186/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29187/// store 0 in dst.
29188///
29189/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29190#[inline]
29191#[target_feature(enable = "avx512f")]
29192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29193pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29194    (_kor_mask16(a, b) == 0) as u8
29195}
29196
29197/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29198///
29199/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29200#[inline]
29201#[target_feature(enable = "avx512f")]
29202#[rustc_legacy_const_generics(1)]
29203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29204pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29205    a.unbounded_shl(COUNT)
29206}
29207
29208/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29209///
29210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29211#[inline]
29212#[target_feature(enable = "avx512f")]
29213#[rustc_legacy_const_generics(1)]
29214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29215pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29216    a.unbounded_shr(COUNT)
29217}
29218
29219/// Load 16-bit mask from memory
29220///
29221/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29222#[inline]
29223#[target_feature(enable = "avx512f")]
29224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29225pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29226    *mem_addr
29227}
29228
29229/// Store 16-bit mask to memory
29230///
29231/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29232#[inline]
29233#[target_feature(enable = "avx512f")]
29234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29235pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29236    *mem_addr = a;
29237}
29238
29239/// Copy 16-bit mask a to k.
29240///
29241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29242#[inline]
29243#[target_feature(enable = "avx512f")]
29244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29245#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29246pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29247    a
29248}
29249
29250/// Converts integer mask into bitmask, storing the result in dst.
29251///
29252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29253#[inline]
29254#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29256pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29257    mask as u16
29258}
29259
29260/// Converts bit mask k1 into an integer value, storing the results in dst.
29261///
29262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29263#[inline]
29264#[target_feature(enable = "avx512f")]
29265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29266#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29267pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29268    k1 as i32
29269}
29270
29271/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29272///
29273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29274#[inline]
29275#[target_feature(enable = "avx512f")]
29276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29277#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29278pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29279    ((a & 0xff) << 8) | (b & 0xff)
29280}
29281
29282/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29283///
29284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29285#[inline]
29286#[target_feature(enable = "avx512f")]
29287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29288#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29289pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29290    let r = (a | b) == 0b11111111_11111111;
29291    r as i32
29292}
29293
29294/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29295///
29296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29297#[inline]
29298#[target_feature(enable = "avx512f")]
29299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29300#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29301pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29302    let r = (a | b) == 0;
29303    r as i32
29304}
29305
29306/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29307///
29308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29309#[inline]
29310#[target_feature(enable = "avx512f")]
29311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29312#[cfg_attr(test, assert_instr(vptestmd))]
29313pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29314    let and = _mm512_and_epi32(a, b);
29315    let zero = _mm512_setzero_si512();
29316    _mm512_cmpneq_epi32_mask(and, zero)
29317}
29318
29319/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29320///
29321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29322#[inline]
29323#[target_feature(enable = "avx512f")]
29324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29325#[cfg_attr(test, assert_instr(vptestmd))]
29326pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29327    let and = _mm512_and_epi32(a, b);
29328    let zero = _mm512_setzero_si512();
29329    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
29330}
29331
29332/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29333///
29334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29335#[inline]
29336#[target_feature(enable = "avx512f,avx512vl")]
29337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29338#[cfg_attr(test, assert_instr(vptestmd))]
29339pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29340    let and = _mm256_and_si256(a, b);
29341    let zero = _mm256_setzero_si256();
29342    _mm256_cmpneq_epi32_mask(and, zero)
29343}
29344
29345/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29346///
29347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29348#[inline]
29349#[target_feature(enable = "avx512f,avx512vl")]
29350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29351#[cfg_attr(test, assert_instr(vptestmd))]
29352pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29353    let and = _mm256_and_si256(a, b);
29354    let zero = _mm256_setzero_si256();
29355    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
29356}
29357
29358/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29359///
29360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29361#[inline]
29362#[target_feature(enable = "avx512f,avx512vl")]
29363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29364#[cfg_attr(test, assert_instr(vptestmd))]
29365pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29366    let and = _mm_and_si128(a, b);
29367    let zero = _mm_setzero_si128();
29368    _mm_cmpneq_epi32_mask(and, zero)
29369}
29370
29371/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29372///
29373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29374#[inline]
29375#[target_feature(enable = "avx512f,avx512vl")]
29376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29377#[cfg_attr(test, assert_instr(vptestmd))]
29378pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29379    let and = _mm_and_si128(a, b);
29380    let zero = _mm_setzero_si128();
29381    _mm_mask_cmpneq_epi32_mask(k, and, zero)
29382}
29383
29384/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29385///
29386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29387#[inline]
29388#[target_feature(enable = "avx512f")]
29389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29390#[cfg_attr(test, assert_instr(vptestmq))]
29391pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29392    let and = _mm512_and_epi64(a, b);
29393    let zero = _mm512_setzero_si512();
29394    _mm512_cmpneq_epi64_mask(and, zero)
29395}
29396
29397/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29398///
29399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29400#[inline]
29401#[target_feature(enable = "avx512f")]
29402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29403#[cfg_attr(test, assert_instr(vptestmq))]
29404pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29405    let and = _mm512_and_epi64(a, b);
29406    let zero = _mm512_setzero_si512();
29407    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
29408}
29409
29410/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29411///
29412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29413#[inline]
29414#[target_feature(enable = "avx512f,avx512vl")]
29415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29416#[cfg_attr(test, assert_instr(vptestmq))]
29417pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29418    let and = _mm256_and_si256(a, b);
29419    let zero = _mm256_setzero_si256();
29420    _mm256_cmpneq_epi64_mask(and, zero)
29421}
29422
29423/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29424///
29425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29426#[inline]
29427#[target_feature(enable = "avx512f,avx512vl")]
29428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29429#[cfg_attr(test, assert_instr(vptestmq))]
29430pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29431    let and = _mm256_and_si256(a, b);
29432    let zero = _mm256_setzero_si256();
29433    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
29434}
29435
29436/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29437///
29438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29439#[inline]
29440#[target_feature(enable = "avx512f,avx512vl")]
29441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29442#[cfg_attr(test, assert_instr(vptestmq))]
29443pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29444    let and = _mm_and_si128(a, b);
29445    let zero = _mm_setzero_si128();
29446    _mm_cmpneq_epi64_mask(and, zero)
29447}
29448
29449/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29450///
29451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29452#[inline]
29453#[target_feature(enable = "avx512f,avx512vl")]
29454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29455#[cfg_attr(test, assert_instr(vptestmq))]
29456pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29457    let and = _mm_and_si128(a, b);
29458    let zero = _mm_setzero_si128();
29459    _mm_mask_cmpneq_epi64_mask(k, and, zero)
29460}
29461
29462/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29463///
29464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29465#[inline]
29466#[target_feature(enable = "avx512f")]
29467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29468#[cfg_attr(test, assert_instr(vptestnmd))]
29469pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29470    let and = _mm512_and_epi32(a, b);
29471    let zero = _mm512_setzero_si512();
29472    _mm512_cmpeq_epi32_mask(and, zero)
29473}
29474
29475/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29476///
29477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29478#[inline]
29479#[target_feature(enable = "avx512f")]
29480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29481#[cfg_attr(test, assert_instr(vptestnmd))]
29482pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29483    let and = _mm512_and_epi32(a, b);
29484    let zero = _mm512_setzero_si512();
29485    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
29486}
29487
29488/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29489///
29490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29491#[inline]
29492#[target_feature(enable = "avx512f,avx512vl")]
29493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29494#[cfg_attr(test, assert_instr(vptestnmd))]
29495pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29496    let and = _mm256_and_si256(a, b);
29497    let zero = _mm256_setzero_si256();
29498    _mm256_cmpeq_epi32_mask(and, zero)
29499}
29500
29501/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29502///
29503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29504#[inline]
29505#[target_feature(enable = "avx512f,avx512vl")]
29506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29507#[cfg_attr(test, assert_instr(vptestnmd))]
29508pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29509    let and = _mm256_and_si256(a, b);
29510    let zero = _mm256_setzero_si256();
29511    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
29512}
29513
29514/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29515///
29516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29517#[inline]
29518#[target_feature(enable = "avx512f,avx512vl")]
29519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29520#[cfg_attr(test, assert_instr(vptestnmd))]
29521pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29522    let and = _mm_and_si128(a, b);
29523    let zero = _mm_setzero_si128();
29524    _mm_cmpeq_epi32_mask(and, zero)
29525}
29526
29527/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29528///
29529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29530#[inline]
29531#[target_feature(enable = "avx512f,avx512vl")]
29532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29533#[cfg_attr(test, assert_instr(vptestnmd))]
29534pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29535    let and = _mm_and_si128(a, b);
29536    let zero = _mm_setzero_si128();
29537    _mm_mask_cmpeq_epi32_mask(k, and, zero)
29538}
29539
29540/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29541///
29542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29543#[inline]
29544#[target_feature(enable = "avx512f")]
29545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29546#[cfg_attr(test, assert_instr(vptestnmq))]
29547pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29548    let and = _mm512_and_epi64(a, b);
29549    let zero = _mm512_setzero_si512();
29550    _mm512_cmpeq_epi64_mask(and, zero)
29551}
29552
29553/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29554///
29555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29556#[inline]
29557#[target_feature(enable = "avx512f")]
29558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29559#[cfg_attr(test, assert_instr(vptestnmq))]
29560pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29561    let and = _mm512_and_epi64(a, b);
29562    let zero = _mm512_setzero_si512();
29563    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
29564}
29565
29566/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29567///
29568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29569#[inline]
29570#[target_feature(enable = "avx512f,avx512vl")]
29571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29572#[cfg_attr(test, assert_instr(vptestnmq))]
29573pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29574    let and = _mm256_and_si256(a, b);
29575    let zero = _mm256_setzero_si256();
29576    _mm256_cmpeq_epi64_mask(and, zero)
29577}
29578
29579/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29580///
29581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29582#[inline]
29583#[target_feature(enable = "avx512f,avx512vl")]
29584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29585#[cfg_attr(test, assert_instr(vptestnmq))]
29586pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29587    let and = _mm256_and_si256(a, b);
29588    let zero = _mm256_setzero_si256();
29589    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
29590}
29591
29592/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29593///
29594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29595#[inline]
29596#[target_feature(enable = "avx512f,avx512vl")]
29597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29598#[cfg_attr(test, assert_instr(vptestnmq))]
29599pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29600    let and = _mm_and_si128(a, b);
29601    let zero = _mm_setzero_si128();
29602    _mm_cmpeq_epi64_mask(and, zero)
29603}
29604
29605/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29606///
29607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29608#[inline]
29609#[target_feature(enable = "avx512f,avx512vl")]
29610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29611#[cfg_attr(test, assert_instr(vptestnmq))]
29612pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29613    let and = _mm_and_si128(a, b);
29614    let zero = _mm_setzero_si128();
29615    _mm_mask_cmpeq_epi64_mask(k, and, zero)
29616}
29617
29618/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29619///
29620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29621///
29622/// # Safety of non-temporal stores
29623///
29624/// After using this intrinsic, but before any other access to the memory that this intrinsic
29625/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29626/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29627/// return.
29628///
29629/// See [`_mm_sfence`] for details.
29630#[inline]
29631#[target_feature(enable = "avx512f")]
29632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29633#[cfg_attr(test, assert_instr(vmovntps))]
29634#[allow(clippy::cast_ptr_alignment)]
29635pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29636    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
29637    crate::arch::asm!(
29638        vps!("vmovntps", ",{a}"),
29639        p = in(reg) mem_addr,
29640        a = in(zmm_reg) a,
29641        options(nostack, preserves_flags),
29642    );
29643}
29644
29645/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29646///
29647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29648///
29649/// # Safety of non-temporal stores
29650///
29651/// After using this intrinsic, but before any other access to the memory that this intrinsic
29652/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29653/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29654/// return.
29655///
29656/// See [`_mm_sfence`] for details.
29657#[inline]
29658#[target_feature(enable = "avx512f")]
29659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29660#[cfg_attr(test, assert_instr(vmovntpd))]
29661#[allow(clippy::cast_ptr_alignment)]
29662pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29663    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
29664    crate::arch::asm!(
29665        vps!("vmovntpd", ",{a}"),
29666        p = in(reg) mem_addr,
29667        a = in(zmm_reg) a,
29668        options(nostack, preserves_flags),
29669    );
29670}
29671
29672/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29673///
29674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29675///
29676/// # Safety of non-temporal stores
29677///
29678/// After using this intrinsic, but before any other access to the memory that this intrinsic
29679/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29680/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29681/// return.
29682///
29683/// See [`_mm_sfence`] for details.
29684#[inline]
29685#[target_feature(enable = "avx512f")]
29686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29687#[cfg_attr(test, assert_instr(vmovntdq))]
29688#[allow(clippy::cast_ptr_alignment)]
29689pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
29690    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
29691    crate::arch::asm!(
29692        vps!("vmovntdq", ",{a}"),
29693        p = in(reg) mem_addr,
29694        a = in(zmm_reg) a,
29695        options(nostack, preserves_flags),
29696    );
29697}
29698
29699/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29700/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29701/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29702///
29703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29704#[inline]
29705#[target_feature(enable = "avx512f")]
29706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29707pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29708    let dst: __m512i;
29709    crate::arch::asm!(
29710        vpl!("vmovntdqa {a}"),
29711        a = out(zmm_reg) dst,
29712        p = in(reg) mem_addr,
29713        options(pure, readonly, nostack, preserves_flags),
29714    );
29715    dst
29716}
29717
29718/// Sets packed 32-bit integers in `dst` with the supplied values.
29719///
29720/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29721#[inline]
29722#[target_feature(enable = "avx512f")]
29723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29724pub fn _mm512_set_ps(
29725    e0: f32,
29726    e1: f32,
29727    e2: f32,
29728    e3: f32,
29729    e4: f32,
29730    e5: f32,
29731    e6: f32,
29732    e7: f32,
29733    e8: f32,
29734    e9: f32,
29735    e10: f32,
29736    e11: f32,
29737    e12: f32,
29738    e13: f32,
29739    e14: f32,
29740    e15: f32,
29741) -> __m512 {
29742    _mm512_setr_ps(
29743        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
29744    )
29745}
29746
29747/// Sets packed 32-bit integers in `dst` with the supplied values in
29748/// reverse order.
29749///
29750/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29751#[inline]
29752#[target_feature(enable = "avx512f")]
29753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29754pub fn _mm512_setr_ps(
29755    e0: f32,
29756    e1: f32,
29757    e2: f32,
29758    e3: f32,
29759    e4: f32,
29760    e5: f32,
29761    e6: f32,
29762    e7: f32,
29763    e8: f32,
29764    e9: f32,
29765    e10: f32,
29766    e11: f32,
29767    e12: f32,
29768    e13: f32,
29769    e14: f32,
29770    e15: f32,
29771) -> __m512 {
29772    unsafe {
29773        let r = f32x16::new(
29774            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29775        );
29776        transmute(r)
29777    }
29778}
29779
29780/// Broadcast 64-bit float `a` to all elements of `dst`.
29781///
29782/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29783#[inline]
29784#[target_feature(enable = "avx512f")]
29785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29786pub fn _mm512_set1_pd(a: f64) -> __m512d {
29787    unsafe { transmute(f64x8::splat(a)) }
29788}
29789
29790/// Broadcast 32-bit float `a` to all elements of `dst`.
29791///
29792/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29793#[inline]
29794#[target_feature(enable = "avx512f")]
29795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29796pub fn _mm512_set1_ps(a: f32) -> __m512 {
29797    unsafe { transmute(f32x16::splat(a)) }
29798}
29799
29800/// Sets packed 32-bit integers in `dst` with the supplied values.
29801///
29802/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29803#[inline]
29804#[target_feature(enable = "avx512f")]
29805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29806pub fn _mm512_set_epi32(
29807    e15: i32,
29808    e14: i32,
29809    e13: i32,
29810    e12: i32,
29811    e11: i32,
29812    e10: i32,
29813    e9: i32,
29814    e8: i32,
29815    e7: i32,
29816    e6: i32,
29817    e5: i32,
29818    e4: i32,
29819    e3: i32,
29820    e2: i32,
29821    e1: i32,
29822    e0: i32,
29823) -> __m512i {
29824    _mm512_setr_epi32(
29825        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29826    )
29827}
29828
29829/// Broadcast 8-bit integer a to all elements of dst.
29830///
29831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29832#[inline]
29833#[target_feature(enable = "avx512f")]
29834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29835pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29836    unsafe { transmute(i8x64::splat(a)) }
29837}
29838
29839/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29840///
29841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29842#[inline]
29843#[target_feature(enable = "avx512f")]
29844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29845pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29846    unsafe { transmute(i16x32::splat(a)) }
29847}
29848
29849/// Broadcast 32-bit integer `a` to all elements of `dst`.
29850///
29851/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29852#[inline]
29853#[target_feature(enable = "avx512f")]
29854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29855pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29856    unsafe { transmute(i32x16::splat(a)) }
29857}
29858
29859/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29860///
29861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29862#[inline]
29863#[target_feature(enable = "avx512f")]
29864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29865#[cfg_attr(test, assert_instr(vpbroadcastd))]
29866pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29867    unsafe {
29868        let r = _mm512_set1_epi32(a).as_i32x16();
29869        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29870    }
29871}
29872
29873/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29874///
29875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29876#[inline]
29877#[target_feature(enable = "avx512f")]
29878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29879#[cfg_attr(test, assert_instr(vpbroadcastd))]
29880pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29881    unsafe {
29882        let r = _mm512_set1_epi32(a).as_i32x16();
29883        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
29884    }
29885}
29886
29887/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29888///
29889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29890#[inline]
29891#[target_feature(enable = "avx512f,avx512vl")]
29892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29893#[cfg_attr(test, assert_instr(vpbroadcastd))]
29894pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29895    unsafe {
29896        let r = _mm256_set1_epi32(a).as_i32x8();
29897        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
29898    }
29899}
29900
29901/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29902///
29903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29904#[inline]
29905#[target_feature(enable = "avx512f,avx512vl")]
29906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29907#[cfg_attr(test, assert_instr(vpbroadcastd))]
29908pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29909    unsafe {
29910        let r = _mm256_set1_epi32(a).as_i32x8();
29911        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
29912    }
29913}
29914
29915/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29916///
29917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29918#[inline]
29919#[target_feature(enable = "avx512f,avx512vl")]
29920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29921#[cfg_attr(test, assert_instr(vpbroadcastd))]
29922pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29923    unsafe {
29924        let r = _mm_set1_epi32(a).as_i32x4();
29925        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
29926    }
29927}
29928
29929/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29930///
29931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29932#[inline]
29933#[target_feature(enable = "avx512f,avx512vl")]
29934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29935#[cfg_attr(test, assert_instr(vpbroadcastd))]
29936pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29937    unsafe {
29938        let r = _mm_set1_epi32(a).as_i32x4();
29939        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
29940    }
29941}
29942
29943/// Broadcast 64-bit integer `a` to all elements of `dst`.
29944///
29945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29946#[inline]
29947#[target_feature(enable = "avx512f")]
29948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29949pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29950    unsafe { transmute(i64x8::splat(a)) }
29951}
29952
29953/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29954///
29955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29956#[inline]
29957#[target_feature(enable = "avx512f")]
29958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29959#[cfg_attr(test, assert_instr(vpbroadcastq))]
29960pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29961    unsafe {
29962        let r = _mm512_set1_epi64(a).as_i64x8();
29963        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
29964    }
29965}
29966
29967/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29968///
29969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29970#[inline]
29971#[target_feature(enable = "avx512f")]
29972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29973#[cfg_attr(test, assert_instr(vpbroadcastq))]
29974pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
29975    unsafe {
29976        let r = _mm512_set1_epi64(a).as_i64x8();
29977        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
29978    }
29979}
29980
29981/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29982///
29983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
29984#[inline]
29985#[target_feature(enable = "avx512f,avx512vl")]
29986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29987#[cfg_attr(test, assert_instr(vpbroadcastq))]
29988pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
29989    unsafe {
29990        let r = _mm256_set1_epi64x(a).as_i64x4();
29991        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
29992    }
29993}
29994
29995/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29996///
29997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
29998#[inline]
29999#[target_feature(enable = "avx512f,avx512vl")]
30000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30001#[cfg_attr(test, assert_instr(vpbroadcastq))]
30002pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30003    unsafe {
30004        let r = _mm256_set1_epi64x(a).as_i64x4();
30005        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
30006    }
30007}
30008
30009/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30010///
30011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30012#[inline]
30013#[target_feature(enable = "avx512f,avx512vl")]
30014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30015#[cfg_attr(test, assert_instr(vpbroadcastq))]
30016pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30017    unsafe {
30018        let r = _mm_set1_epi64x(a).as_i64x2();
30019        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
30020    }
30021}
30022
30023/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30024///
30025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30026#[inline]
30027#[target_feature(enable = "avx512f,avx512vl")]
30028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30029#[cfg_attr(test, assert_instr(vpbroadcastq))]
30030pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30031    unsafe {
30032        let r = _mm_set1_epi64x(a).as_i64x2();
30033        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
30034    }
30035}
30036
30037/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30038///
30039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30040#[inline]
30041#[target_feature(enable = "avx512f")]
30042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30043pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30044    _mm512_set_epi64(d, c, b, a, d, c, b, a)
30045}
30046
30047/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30048///
30049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30050#[inline]
30051#[target_feature(enable = "avx512f")]
30052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30053pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30054    _mm512_set_epi64(a, b, c, d, a, b, c, d)
30055}
30056
30057/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30058///
30059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30060#[inline]
30061#[target_feature(enable = "avx512f")]
30062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30063#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30064pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30065    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30066}
30067
30068/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30069///
30070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30071#[inline]
30072#[target_feature(enable = "avx512f")]
30073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30074#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30075pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30076    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30077}
30078
30079/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30080///
30081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30082#[inline]
30083#[target_feature(enable = "avx512f")]
30084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30085#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30086pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30087    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30088}
30089
30090/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30091///
30092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30093#[inline]
30094#[target_feature(enable = "avx512f")]
30095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30096#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30097pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30098    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30099}
30100
30101/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30102///
30103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30104#[inline]
30105#[target_feature(enable = "avx512f")]
30106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30107#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30108pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30109    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30110}
30111
30112/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30113///
30114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30115#[inline]
30116#[target_feature(enable = "avx512f")]
30117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30118#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30119pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30120    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30121}
30122
30123/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30124///
30125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30126#[inline]
30127#[target_feature(enable = "avx512f")]
30128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30129#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30130pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30131    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30132}
30133
30134/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30135///
30136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30137#[inline]
30138#[target_feature(enable = "avx512f")]
30139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30140#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30141pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30142    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30143}
30144
30145/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30146///
30147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30148#[inline]
30149#[target_feature(enable = "avx512f")]
30150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30151#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30152pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30153    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30154}
30155
30156/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30157///
30158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30159#[inline]
30160#[target_feature(enable = "avx512f")]
30161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30162#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30163pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30164    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30165}
30166
30167/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30168///
30169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30170#[inline]
30171#[target_feature(enable = "avx512f")]
30172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30173#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30174pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30175    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30176}
30177
30178/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30179///
30180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30181#[inline]
30182#[target_feature(enable = "avx512f")]
30183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30184#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30185pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30186    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30187}
30188
30189/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30190///
30191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30192#[inline]
30193#[target_feature(enable = "avx512f")]
30194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30195#[rustc_legacy_const_generics(2)]
30196#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30197pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30198    unsafe {
30199        static_assert_uimm_bits!(IMM8, 5);
30200        let neg_one = -1;
30201        let a = a.as_f32x16();
30202        let b = b.as_f32x16();
30203        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30204        r.cast_unsigned()
30205    }
30206}
30207
30208/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30209///
30210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30211#[inline]
30212#[target_feature(enable = "avx512f")]
30213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30214#[rustc_legacy_const_generics(3)]
30215#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30216pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30217    unsafe {
30218        static_assert_uimm_bits!(IMM8, 5);
30219        let a = a.as_f32x16();
30220        let b = b.as_f32x16();
30221        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
30222        r.cast_unsigned()
30223    }
30224}
30225
30226/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30227///
30228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30229#[inline]
30230#[target_feature(enable = "avx512f,avx512vl")]
30231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30232#[rustc_legacy_const_generics(2)]
30233#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30234pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30235    unsafe {
30236        static_assert_uimm_bits!(IMM8, 5);
30237        let neg_one = -1;
30238        let a = a.as_f32x8();
30239        let b = b.as_f32x8();
30240        let r = vcmpps256(a, b, IMM8, neg_one);
30241        r.cast_unsigned()
30242    }
30243}
30244
30245/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30246///
30247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30248#[inline]
30249#[target_feature(enable = "avx512f,avx512vl")]
30250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30251#[rustc_legacy_const_generics(3)]
30252#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30253pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30254    unsafe {
30255        static_assert_uimm_bits!(IMM8, 5);
30256        let a = a.as_f32x8();
30257        let b = b.as_f32x8();
30258        let r = vcmpps256(a, b, IMM8, k1 as i8);
30259        r.cast_unsigned()
30260    }
30261}
30262
30263/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30264///
30265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30266#[inline]
30267#[target_feature(enable = "avx512f,avx512vl")]
30268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30269#[rustc_legacy_const_generics(2)]
30270#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30271pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30272    unsafe {
30273        static_assert_uimm_bits!(IMM8, 5);
30274        let neg_one = -1;
30275        let a = a.as_f32x4();
30276        let b = b.as_f32x4();
30277        let r = vcmpps128(a, b, IMM8, neg_one);
30278        r.cast_unsigned()
30279    }
30280}
30281
30282/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30283///
30284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30285#[inline]
30286#[target_feature(enable = "avx512f,avx512vl")]
30287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30288#[rustc_legacy_const_generics(3)]
30289#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30290pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30291    unsafe {
30292        static_assert_uimm_bits!(IMM8, 5);
30293        let a = a.as_f32x4();
30294        let b = b.as_f32x4();
30295        let r = vcmpps128(a, b, IMM8, k1 as i8);
30296        r.cast_unsigned()
30297    }
30298}
30299
30300/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30301/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30302///
30303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30304#[inline]
30305#[target_feature(enable = "avx512f")]
30306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30307#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30308#[rustc_legacy_const_generics(2, 3)]
30309pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30310    a: __m512,
30311    b: __m512,
30312) -> __mmask16 {
30313    unsafe {
30314        static_assert_uimm_bits!(IMM5, 5);
30315        static_assert_mantissas_sae!(SAE);
30316        let neg_one = -1;
30317        let a = a.as_f32x16();
30318        let b = b.as_f32x16();
30319        let r = vcmpps(a, b, IMM5, neg_one, SAE);
30320        r.cast_unsigned()
30321    }
30322}
30323
30324/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30325/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30326///
30327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30328#[inline]
30329#[target_feature(enable = "avx512f")]
30330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30331#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30332#[rustc_legacy_const_generics(3, 4)]
30333pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30334    m: __mmask16,
30335    a: __m512,
30336    b: __m512,
30337) -> __mmask16 {
30338    unsafe {
30339        static_assert_uimm_bits!(IMM5, 5);
30340        static_assert_mantissas_sae!(SAE);
30341        let a = a.as_f32x16();
30342        let b = b.as_f32x16();
30343        let r = vcmpps(a, b, IMM5, m as i16, SAE);
30344        r.cast_unsigned()
30345    }
30346}
30347
30348/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30349///
30350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30351#[inline]
30352#[target_feature(enable = "avx512f")]
30353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30354#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30355pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30356    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30357}
30358
30359/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30360///
30361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30362#[inline]
30363#[target_feature(enable = "avx512f")]
30364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30365#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30366pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30367    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30368}
30369
30370/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30371///
30372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30373#[inline]
30374#[target_feature(enable = "avx512f")]
30375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30376#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30377pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30378    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30379}
30380
30381/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30382///
30383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30384#[inline]
30385#[target_feature(enable = "avx512f")]
30386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30387#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30388pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30389    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30390}
30391
30392/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30393///
30394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30395#[inline]
30396#[target_feature(enable = "avx512f")]
30397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30398#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30399pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30400    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30401}
30402
30403/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30404///
30405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30406#[inline]
30407#[target_feature(enable = "avx512f")]
30408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30409#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30410pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30411    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30412}
30413
30414/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30415///
30416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30417#[inline]
30418#[target_feature(enable = "avx512f")]
30419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30420#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30421pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30422    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30423}
30424
30425/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30426///
30427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30428#[inline]
30429#[target_feature(enable = "avx512f")]
30430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30431#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30432pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30433    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
30434}
30435
30436/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30437///
30438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30439#[inline]
30440#[target_feature(enable = "avx512f")]
30441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30442#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30443pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30444    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30445}
30446
30447/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30448///
30449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30450#[inline]
30451#[target_feature(enable = "avx512f")]
30452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30453#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30454pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30455    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30456}
30457
30458/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30459///
30460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30461#[inline]
30462#[target_feature(enable = "avx512f")]
30463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30464#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30465pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30466    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30467}
30468
30469/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30470///
30471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30472#[inline]
30473#[target_feature(enable = "avx512f")]
30474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30475#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30476pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30477    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30478}
30479
30480/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30481///
30482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30483#[inline]
30484#[target_feature(enable = "avx512f")]
30485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30486#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30487pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30488    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30489}
30490
30491/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30492///
30493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30494#[inline]
30495#[target_feature(enable = "avx512f")]
30496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30497#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30498pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30499    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30500}
30501
30502/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30503///
30504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30505#[inline]
30506#[target_feature(enable = "avx512f")]
30507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30508#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30509pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30510    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30511}
30512
30513/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30514///
30515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30516#[inline]
30517#[target_feature(enable = "avx512f")]
30518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30519#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30520pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30521    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30522}
30523
30524/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30525///
30526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30527#[inline]
30528#[target_feature(enable = "avx512f")]
30529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30530#[rustc_legacy_const_generics(2)]
30531#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30532pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30533    unsafe {
30534        static_assert_uimm_bits!(IMM8, 5);
30535        let neg_one = -1;
30536        let a = a.as_f64x8();
30537        let b = b.as_f64x8();
30538        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30539        r.cast_unsigned()
30540    }
30541}
30542
30543/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30544///
30545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30546#[inline]
30547#[target_feature(enable = "avx512f")]
30548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30549#[rustc_legacy_const_generics(3)]
30550#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30551pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30552    unsafe {
30553        static_assert_uimm_bits!(IMM8, 5);
30554        let a = a.as_f64x8();
30555        let b = b.as_f64x8();
30556        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30557        r.cast_unsigned()
30558    }
30559}
30560
30561/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30562///
30563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30564#[inline]
30565#[target_feature(enable = "avx512f,avx512vl")]
30566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30567#[rustc_legacy_const_generics(2)]
30568#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30569pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30570    unsafe {
30571        static_assert_uimm_bits!(IMM8, 5);
30572        let neg_one = -1;
30573        let a = a.as_f64x4();
30574        let b = b.as_f64x4();
30575        let r = vcmppd256(a, b, IMM8, neg_one);
30576        r.cast_unsigned()
30577    }
30578}
30579
30580/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30581///
30582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30583#[inline]
30584#[target_feature(enable = "avx512f,avx512vl")]
30585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30586#[rustc_legacy_const_generics(3)]
30587#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30588pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30589    unsafe {
30590        static_assert_uimm_bits!(IMM8, 5);
30591        let a = a.as_f64x4();
30592        let b = b.as_f64x4();
30593        let r = vcmppd256(a, b, IMM8, k1 as i8);
30594        r.cast_unsigned()
30595    }
30596}
30597
30598/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30599///
30600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30601#[inline]
30602#[target_feature(enable = "avx512f,avx512vl")]
30603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30604#[rustc_legacy_const_generics(2)]
30605#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30606pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30607    unsafe {
30608        static_assert_uimm_bits!(IMM8, 5);
30609        let neg_one = -1;
30610        let a = a.as_f64x2();
30611        let b = b.as_f64x2();
30612        let r = vcmppd128(a, b, IMM8, neg_one);
30613        r.cast_unsigned()
30614    }
30615}
30616
30617/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30618///
30619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30620#[inline]
30621#[target_feature(enable = "avx512f,avx512vl")]
30622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30623#[rustc_legacy_const_generics(3)]
30624#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30625pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30626    unsafe {
30627        static_assert_uimm_bits!(IMM8, 5);
30628        let a = a.as_f64x2();
30629        let b = b.as_f64x2();
30630        let r = vcmppd128(a, b, IMM8, k1 as i8);
30631        r.cast_unsigned()
30632    }
30633}
30634
30635/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30636/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30637///
30638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30639#[inline]
30640#[target_feature(enable = "avx512f")]
30641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30642#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30643#[rustc_legacy_const_generics(2, 3)]
30644pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30645    a: __m512d,
30646    b: __m512d,
30647) -> __mmask8 {
30648    unsafe {
30649        static_assert_uimm_bits!(IMM5, 5);
30650        static_assert_mantissas_sae!(SAE);
30651        let neg_one = -1;
30652        let a = a.as_f64x8();
30653        let b = b.as_f64x8();
30654        let r = vcmppd(a, b, IMM5, neg_one, SAE);
30655        r.cast_unsigned()
30656    }
30657}
30658
30659/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30660/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30661///
30662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30663#[inline]
30664#[target_feature(enable = "avx512f")]
30665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30666#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30667#[rustc_legacy_const_generics(3, 4)]
30668pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30669    k1: __mmask8,
30670    a: __m512d,
30671    b: __m512d,
30672) -> __mmask8 {
30673    unsafe {
30674        static_assert_uimm_bits!(IMM5, 5);
30675        static_assert_mantissas_sae!(SAE);
30676        let a = a.as_f64x8();
30677        let b = b.as_f64x8();
30678        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
30679        r.cast_unsigned()
30680    }
30681}
30682
30683/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30684///
30685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30686#[inline]
30687#[target_feature(enable = "avx512f")]
30688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30689#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30690pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30691    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30692}
30693
30694/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30695///
30696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30697#[inline]
30698#[target_feature(enable = "avx512f")]
30699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30700#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30701pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30702    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30703}
30704
30705/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30706///
30707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30708#[inline]
30709#[target_feature(enable = "avx512f")]
30710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30711#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30712pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30713    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30714}
30715
30716/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30717///
30718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30719#[inline]
30720#[target_feature(enable = "avx512f")]
30721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30722#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30723pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30724    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30725}
30726
30727/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30728///
30729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30730#[inline]
30731#[target_feature(enable = "avx512f")]
30732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30733#[rustc_legacy_const_generics(2)]
30734#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30735pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30736    unsafe {
30737        static_assert_uimm_bits!(IMM8, 5);
30738        let neg_one = -1;
30739        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30740        r.cast_unsigned()
30741    }
30742}
30743
30744/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30745///
30746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30747#[inline]
30748#[target_feature(enable = "avx512f")]
30749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30750#[rustc_legacy_const_generics(3)]
30751#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30752pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30753    unsafe {
30754        static_assert_uimm_bits!(IMM8, 5);
30755        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30756        r.cast_unsigned()
30757    }
30758}
30759
30760/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30761/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30762///
30763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30764#[inline]
30765#[target_feature(enable = "avx512f")]
30766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30767#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30768#[rustc_legacy_const_generics(2, 3)]
30769pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30770    unsafe {
30771        static_assert_uimm_bits!(IMM5, 5);
30772        static_assert_mantissas_sae!(SAE);
30773        let neg_one = -1;
30774        let r = vcmpss(a, b, IMM5, neg_one, SAE);
30775        r.cast_unsigned()
30776    }
30777}
30778
30779/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30781///
30782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30783#[inline]
30784#[target_feature(enable = "avx512f")]
30785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30786#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30787#[rustc_legacy_const_generics(3, 4)]
30788pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30789    k1: __mmask8,
30790    a: __m128,
30791    b: __m128,
30792) -> __mmask8 {
30793    unsafe {
30794        static_assert_uimm_bits!(IMM5, 5);
30795        static_assert_mantissas_sae!(SAE);
30796        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
30797        r.cast_unsigned()
30798    }
30799}
30800
30801/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30802///
30803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30804#[inline]
30805#[target_feature(enable = "avx512f")]
30806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30807#[rustc_legacy_const_generics(2)]
30808#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30809pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30810    unsafe {
30811        static_assert_uimm_bits!(IMM8, 5);
30812        let neg_one = -1;
30813        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30814        r.cast_unsigned()
30815    }
30816}
30817
30818/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30819///
30820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30821#[inline]
30822#[target_feature(enable = "avx512f")]
30823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30824#[rustc_legacy_const_generics(3)]
30825#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30826pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30827    unsafe {
30828        static_assert_uimm_bits!(IMM8, 5);
30829        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30830        r.cast_unsigned()
30831    }
30832}
30833
30834/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30835/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30836///
30837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30838#[inline]
30839#[target_feature(enable = "avx512f")]
30840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30841#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30842#[rustc_legacy_const_generics(2, 3)]
30843pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30844    unsafe {
30845        static_assert_uimm_bits!(IMM5, 5);
30846        static_assert_mantissas_sae!(SAE);
30847        let neg_one = -1;
30848        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
30849        r.cast_unsigned()
30850    }
30851}
30852
30853/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30854/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30855///
30856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30857#[inline]
30858#[target_feature(enable = "avx512f")]
30859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30860#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30861#[rustc_legacy_const_generics(3, 4)]
30862pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30863    k1: __mmask8,
30864    a: __m128d,
30865    b: __m128d,
30866) -> __mmask8 {
30867    unsafe {
30868        static_assert_uimm_bits!(IMM5, 5);
30869        static_assert_mantissas_sae!(SAE);
30870        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
30871        r.cast_unsigned()
30872    }
30873}
30874
30875/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30876///
30877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30878#[inline]
30879#[target_feature(enable = "avx512f")]
30880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30881#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30882pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30883    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
30884}
30885
30886/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30887///
30888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30889#[inline]
30890#[target_feature(enable = "avx512f")]
30891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30892#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30893pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30894    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30895}
30896
30897/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30898///
30899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30900#[inline]
30901#[target_feature(enable = "avx512f,avx512vl")]
30902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30903#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30904pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30905    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
30906}
30907
30908/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30909///
30910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30911#[inline]
30912#[target_feature(enable = "avx512f,avx512vl")]
30913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30914#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30915pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30916    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30917}
30918
30919/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30920///
30921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30922#[inline]
30923#[target_feature(enable = "avx512f,avx512vl")]
30924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30925#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30926pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30927    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
30928}
30929
30930/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30931///
30932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30933#[inline]
30934#[target_feature(enable = "avx512f,avx512vl")]
30935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30936#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30937pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30938    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30939}
30940
30941/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30942///
30943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30944#[inline]
30945#[target_feature(enable = "avx512f")]
30946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30947#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30948pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30949    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
30950}
30951
30952/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30953///
30954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30955#[inline]
30956#[target_feature(enable = "avx512f")]
30957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30958#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30959pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30960    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30961}
30962
30963/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30964///
30965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30966#[inline]
30967#[target_feature(enable = "avx512f,avx512vl")]
30968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30969#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30970pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30971    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
30972}
30973
30974/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30975///
30976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
30977#[inline]
30978#[target_feature(enable = "avx512f,avx512vl")]
30979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30980#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30981pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30982    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30983}
30984
30985/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30986///
30987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
30988#[inline]
30989#[target_feature(enable = "avx512f,avx512vl")]
30990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30991#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30992pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30993    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
30994}
30995
30996/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30997///
30998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
30999#[inline]
31000#[target_feature(enable = "avx512f,avx512vl")]
31001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31002#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31003pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31004    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31005}
31006
31007/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31008///
31009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31010#[inline]
31011#[target_feature(enable = "avx512f")]
31012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31013#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31014pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31015    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
31016}
31017
31018/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31019///
31020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31021#[inline]
31022#[target_feature(enable = "avx512f")]
31023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31024#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31025pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31026    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31027}
31028
31029/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31030///
31031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31032#[inline]
31033#[target_feature(enable = "avx512f,avx512vl")]
31034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31035#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31036pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31037    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
31038}
31039
31040/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31041///
31042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31043#[inline]
31044#[target_feature(enable = "avx512f,avx512vl")]
31045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31046#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31047pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31048    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31049}
31050
31051/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31052///
31053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31054#[inline]
31055#[target_feature(enable = "avx512f,avx512vl")]
31056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31057#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31058pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31059    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
31060}
31061
31062/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31063///
31064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31065#[inline]
31066#[target_feature(enable = "avx512f,avx512vl")]
31067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31068#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31069pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31070    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31071}
31072
31073/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31074///
31075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31076#[inline]
31077#[target_feature(enable = "avx512f")]
31078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31079#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31080pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31081    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
31082}
31083
31084/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31085///
31086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31087#[inline]
31088#[target_feature(enable = "avx512f")]
31089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31090#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31091pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31092    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31093}
31094
31095/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31096///
31097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31098#[inline]
31099#[target_feature(enable = "avx512f,avx512vl")]
31100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31101#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31102pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31103    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
31104}
31105
31106/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31107///
31108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31109#[inline]
31110#[target_feature(enable = "avx512f,avx512vl")]
31111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31112#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31113pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31114    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31115}
31116
31117/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31118///
31119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31120#[inline]
31121#[target_feature(enable = "avx512f,avx512vl")]
31122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31123#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31124pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31125    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
31126}
31127
31128/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31129///
31130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31131#[inline]
31132#[target_feature(enable = "avx512f,avx512vl")]
31133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31134#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31135pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31136    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31137}
31138
31139/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31140///
31141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31142#[inline]
31143#[target_feature(enable = "avx512f")]
31144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31145#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31146pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31147    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
31148}
31149
31150/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31151///
31152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31153#[inline]
31154#[target_feature(enable = "avx512f")]
31155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31156#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31157pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31158    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31159}
31160
31161/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31162///
31163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31164#[inline]
31165#[target_feature(enable = "avx512f,avx512vl")]
31166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31167#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31168pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31169    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
31170}
31171
31172/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31173///
31174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31175#[inline]
31176#[target_feature(enable = "avx512f,avx512vl")]
31177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31178#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31179pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31180    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31181}
31182
31183/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31184///
31185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31186#[inline]
31187#[target_feature(enable = "avx512f,avx512vl")]
31188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31189#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31190pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31191    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
31192}
31193
31194/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31195///
31196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31197#[inline]
31198#[target_feature(enable = "avx512f,avx512vl")]
31199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31200#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31201pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31202    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31203}
31204
31205/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31206///
31207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31208#[inline]
31209#[target_feature(enable = "avx512f")]
31210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31211#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31212pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31213    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
31214}
31215
31216/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31217///
31218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31219#[inline]
31220#[target_feature(enable = "avx512f")]
31221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31222#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31223pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31224    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31225}
31226
31227/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31228///
31229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31230#[inline]
31231#[target_feature(enable = "avx512f,avx512vl")]
31232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31233#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31234pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31235    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
31236}
31237
31238/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31239///
31240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31241#[inline]
31242#[target_feature(enable = "avx512f,avx512vl")]
31243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31244#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31245pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31246    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31247}
31248
31249/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31250///
31251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31252#[inline]
31253#[target_feature(enable = "avx512f,avx512vl")]
31254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31255#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31256pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31257    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
31258}
31259
31260/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31261///
31262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31263#[inline]
31264#[target_feature(enable = "avx512f,avx512vl")]
31265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31266#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31267pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31268    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31269}
31270
31271/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31272///
31273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31274#[inline]
31275#[target_feature(enable = "avx512f")]
31276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31277#[rustc_legacy_const_generics(2)]
31278#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31279pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31280    unsafe {
31281        static_assert_uimm_bits!(IMM3, 3);
31282        let a = a.as_u32x16();
31283        let b = b.as_u32x16();
31284        let r = match IMM3 {
31285            0 => simd_eq(a, b),
31286            1 => simd_lt(a, b),
31287            2 => simd_le(a, b),
31288            3 => i32x16::ZERO,
31289            4 => simd_ne(a, b),
31290            5 => simd_ge(a, b),
31291            6 => simd_gt(a, b),
31292            _ => i32x16::splat(-1),
31293        };
31294        simd_bitmask(r)
31295    }
31296}
31297
31298/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31299///
31300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31301#[inline]
31302#[target_feature(enable = "avx512f")]
31303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31304#[rustc_legacy_const_generics(3)]
31305#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31306pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31307    k1: __mmask16,
31308    a: __m512i,
31309    b: __m512i,
31310) -> __mmask16 {
31311    unsafe {
31312        static_assert_uimm_bits!(IMM3, 3);
31313        let a = a.as_u32x16();
31314        let b = b.as_u32x16();
31315        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31316        let r = match IMM3 {
31317            0 => simd_and(k1, simd_eq(a, b)),
31318            1 => simd_and(k1, simd_lt(a, b)),
31319            2 => simd_and(k1, simd_le(a, b)),
31320            3 => i32x16::ZERO,
31321            4 => simd_and(k1, simd_ne(a, b)),
31322            5 => simd_and(k1, simd_ge(a, b)),
31323            6 => simd_and(k1, simd_gt(a, b)),
31324            _ => k1,
31325        };
31326        simd_bitmask(r)
31327    }
31328}
31329
31330/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31331///
31332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31333#[inline]
31334#[target_feature(enable = "avx512f,avx512vl")]
31335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31336#[rustc_legacy_const_generics(2)]
31337#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31338pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31339    unsafe {
31340        static_assert_uimm_bits!(IMM3, 3);
31341        let a = a.as_u32x8();
31342        let b = b.as_u32x8();
31343        let r = match IMM3 {
31344            0 => simd_eq(a, b),
31345            1 => simd_lt(a, b),
31346            2 => simd_le(a, b),
31347            3 => i32x8::ZERO,
31348            4 => simd_ne(a, b),
31349            5 => simd_ge(a, b),
31350            6 => simd_gt(a, b),
31351            _ => i32x8::splat(-1),
31352        };
31353        simd_bitmask(r)
31354    }
31355}
31356
31357/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31358///
31359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31360#[inline]
31361#[target_feature(enable = "avx512f,avx512vl")]
31362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31363#[rustc_legacy_const_generics(3)]
31364#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31365pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31366    k1: __mmask8,
31367    a: __m256i,
31368    b: __m256i,
31369) -> __mmask8 {
31370    unsafe {
31371        static_assert_uimm_bits!(IMM3, 3);
31372        let a = a.as_u32x8();
31373        let b = b.as_u32x8();
31374        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31375        let r = match IMM3 {
31376            0 => simd_and(k1, simd_eq(a, b)),
31377            1 => simd_and(k1, simd_lt(a, b)),
31378            2 => simd_and(k1, simd_le(a, b)),
31379            3 => i32x8::ZERO,
31380            4 => simd_and(k1, simd_ne(a, b)),
31381            5 => simd_and(k1, simd_ge(a, b)),
31382            6 => simd_and(k1, simd_gt(a, b)),
31383            _ => k1,
31384        };
31385        simd_bitmask(r)
31386    }
31387}
31388
31389/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31390///
31391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31392#[inline]
31393#[target_feature(enable = "avx512f,avx512vl")]
31394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31395#[rustc_legacy_const_generics(2)]
31396#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31397pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31398    unsafe {
31399        static_assert_uimm_bits!(IMM3, 3);
31400        let a = a.as_u32x4();
31401        let b = b.as_u32x4();
31402        let r = match IMM3 {
31403            0 => simd_eq(a, b),
31404            1 => simd_lt(a, b),
31405            2 => simd_le(a, b),
31406            3 => i32x4::ZERO,
31407            4 => simd_ne(a, b),
31408            5 => simd_ge(a, b),
31409            6 => simd_gt(a, b),
31410            _ => i32x4::splat(-1),
31411        };
31412        simd_bitmask(r)
31413    }
31414}
31415
31416/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31417///
31418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31419#[inline]
31420#[target_feature(enable = "avx512f,avx512vl")]
31421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31422#[rustc_legacy_const_generics(3)]
31423#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31424pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31425    k1: __mmask8,
31426    a: __m128i,
31427    b: __m128i,
31428) -> __mmask8 {
31429    unsafe {
31430        static_assert_uimm_bits!(IMM3, 3);
31431        let a = a.as_u32x4();
31432        let b = b.as_u32x4();
31433        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
31434        let r = match IMM3 {
31435            0 => simd_and(k1, simd_eq(a, b)),
31436            1 => simd_and(k1, simd_lt(a, b)),
31437            2 => simd_and(k1, simd_le(a, b)),
31438            3 => i32x4::ZERO,
31439            4 => simd_and(k1, simd_ne(a, b)),
31440            5 => simd_and(k1, simd_ge(a, b)),
31441            6 => simd_and(k1, simd_gt(a, b)),
31442            _ => k1,
31443        };
31444        simd_bitmask(r)
31445    }
31446}
31447
31448/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31449///
31450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31451#[inline]
31452#[target_feature(enable = "avx512f")]
31453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31454#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31455pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31456    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
31457}
31458
31459/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31460///
31461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31462#[inline]
31463#[target_feature(enable = "avx512f")]
31464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31465#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31466pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31467    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31468}
31469
31470/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31471///
31472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31473#[inline]
31474#[target_feature(enable = "avx512f,avx512vl")]
31475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31476#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31477pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31478    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
31479}
31480
31481/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31482///
31483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31484#[inline]
31485#[target_feature(enable = "avx512f,avx512vl")]
31486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31487#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31488pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31489    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31490}
31491
31492/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31493///
31494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31495#[inline]
31496#[target_feature(enable = "avx512f,avx512vl")]
31497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31498#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31499pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31500    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
31501}
31502
31503/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31504///
31505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31506#[inline]
31507#[target_feature(enable = "avx512f,avx512vl")]
31508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31509#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31510pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31511    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31512}
31513
31514/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31515///
31516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31517#[inline]
31518#[target_feature(enable = "avx512f")]
31519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31520#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31521pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31522    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
31523}
31524
31525/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31526///
31527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31528#[inline]
31529#[target_feature(enable = "avx512f")]
31530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31531#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31532pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31533    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31534}
31535
31536/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31537///
31538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31539#[inline]
31540#[target_feature(enable = "avx512f,avx512vl")]
31541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31542#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31543pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31544    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
31545}
31546
31547/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31548///
31549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31550#[inline]
31551#[target_feature(enable = "avx512f,avx512vl")]
31552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31553#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31554pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31555    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31556}
31557
31558/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31559///
31560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31561#[inline]
31562#[target_feature(enable = "avx512f,avx512vl")]
31563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31564#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31565pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31566    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
31567}
31568
31569/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31570///
31571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31572#[inline]
31573#[target_feature(enable = "avx512f,avx512vl")]
31574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31575#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31576pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31577    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31578}
31579
31580/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31581///
31582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31583#[inline]
31584#[target_feature(enable = "avx512f")]
31585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31586#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31587pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31588    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
31589}
31590
31591/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31592///
31593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31594#[inline]
31595#[target_feature(enable = "avx512f")]
31596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31597#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31598pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31599    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31600}
31601
31602/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31603///
31604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31605#[inline]
31606#[target_feature(enable = "avx512f,avx512vl")]
31607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31608#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31609pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31610    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
31611}
31612
31613/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31614///
31615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31616#[inline]
31617#[target_feature(enable = "avx512f,avx512vl")]
31618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31619#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31620pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31621    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31622}
31623
31624/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31625///
31626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31627#[inline]
31628#[target_feature(enable = "avx512f,avx512vl")]
31629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31630#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31631pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31632    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
31633}
31634
31635/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31636///
31637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31638#[inline]
31639#[target_feature(enable = "avx512f,avx512vl")]
31640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31641#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31642pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31643    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31644}
31645
31646/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31647///
31648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31649#[inline]
31650#[target_feature(enable = "avx512f")]
31651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31652#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31653pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31654    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
31655}
31656
31657/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31658///
31659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31660#[inline]
31661#[target_feature(enable = "avx512f")]
31662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31663#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31664pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31665    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31666}
31667
31668/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31669///
31670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31671#[inline]
31672#[target_feature(enable = "avx512f,avx512vl")]
31673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31674#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31675pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31676    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
31677}
31678
31679/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31680///
31681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31682#[inline]
31683#[target_feature(enable = "avx512f,avx512vl")]
31684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31685#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31686pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31687    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31688}
31689
31690/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31691///
31692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31693#[inline]
31694#[target_feature(enable = "avx512f,avx512vl")]
31695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31696#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31697pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31698    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
31699}
31700
31701/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31702///
31703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31704#[inline]
31705#[target_feature(enable = "avx512f,avx512vl")]
31706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31707#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31708pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31709    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31710}
31711
31712/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31713///
31714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31715#[inline]
31716#[target_feature(enable = "avx512f")]
31717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31718#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31719pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31720    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
31721}
31722
31723/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31724///
31725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31726#[inline]
31727#[target_feature(enable = "avx512f")]
31728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31729#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31730pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31731    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31732}
31733
31734/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31735///
31736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31737#[inline]
31738#[target_feature(enable = "avx512f,avx512vl")]
31739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31740#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31741pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31742    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
31743}
31744
31745/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31746///
31747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31748#[inline]
31749#[target_feature(enable = "avx512f,avx512vl")]
31750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31751#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31752pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31753    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31754}
31755
31756/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31757///
31758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31759#[inline]
31760#[target_feature(enable = "avx512f,avx512vl")]
31761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31762#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31763pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31764    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
31765}
31766
31767/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31768///
31769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31770#[inline]
31771#[target_feature(enable = "avx512f,avx512vl")]
31772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31773#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31774pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31775    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31776}
31777
31778/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31779///
31780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31781#[inline]
31782#[target_feature(enable = "avx512f")]
31783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31784#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31785pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31786    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
31787}
31788
31789/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31790///
31791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31792#[inline]
31793#[target_feature(enable = "avx512f")]
31794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31795#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31796pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31797    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31798}
31799
31800/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31801///
31802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31803#[inline]
31804#[target_feature(enable = "avx512f,avx512vl")]
31805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31806#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31807pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31808    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
31809}
31810
31811/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31812///
31813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31814#[inline]
31815#[target_feature(enable = "avx512f,avx512vl")]
31816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31817#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31818pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31819    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31820}
31821
31822/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31823///
31824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31825#[inline]
31826#[target_feature(enable = "avx512f,avx512vl")]
31827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31828#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31829pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31830    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
31831}
31832
31833/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31834///
31835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31836#[inline]
31837#[target_feature(enable = "avx512f,avx512vl")]
31838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31839#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31840pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31841    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31842}
31843
31844/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31845///
31846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31847#[inline]
31848#[target_feature(enable = "avx512f")]
31849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31850#[rustc_legacy_const_generics(2)]
31851#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31852pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31853    unsafe {
31854        static_assert_uimm_bits!(IMM3, 3);
31855        let a = a.as_i32x16();
31856        let b = b.as_i32x16();
31857        let r = match IMM3 {
31858            0 => simd_eq(a, b),
31859            1 => simd_lt(a, b),
31860            2 => simd_le(a, b),
31861            3 => i32x16::ZERO,
31862            4 => simd_ne(a, b),
31863            5 => simd_ge(a, b),
31864            6 => simd_gt(a, b),
31865            _ => i32x16::splat(-1),
31866        };
31867        simd_bitmask(r)
31868    }
31869}
31870
31871/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31872///
31873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31874#[inline]
31875#[target_feature(enable = "avx512f")]
31876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31877#[rustc_legacy_const_generics(3)]
31878#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31879pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31880    k1: __mmask16,
31881    a: __m512i,
31882    b: __m512i,
31883) -> __mmask16 {
31884    unsafe {
31885        static_assert_uimm_bits!(IMM3, 3);
31886        let a = a.as_i32x16();
31887        let b = b.as_i32x16();
31888        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31889        let r = match IMM3 {
31890            0 => simd_and(k1, simd_eq(a, b)),
31891            1 => simd_and(k1, simd_lt(a, b)),
31892            2 => simd_and(k1, simd_le(a, b)),
31893            3 => i32x16::ZERO,
31894            4 => simd_and(k1, simd_ne(a, b)),
31895            5 => simd_and(k1, simd_ge(a, b)),
31896            6 => simd_and(k1, simd_gt(a, b)),
31897            _ => k1,
31898        };
31899        simd_bitmask(r)
31900    }
31901}
31902
31903/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31904///
31905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31906#[inline]
31907#[target_feature(enable = "avx512f,avx512vl")]
31908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31909#[rustc_legacy_const_generics(2)]
31910#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31911pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31912    unsafe {
31913        static_assert_uimm_bits!(IMM3, 3);
31914        let a = a.as_i32x8();
31915        let b = b.as_i32x8();
31916        let r = match IMM3 {
31917            0 => simd_eq(a, b),
31918            1 => simd_lt(a, b),
31919            2 => simd_le(a, b),
31920            3 => i32x8::ZERO,
31921            4 => simd_ne(a, b),
31922            5 => simd_ge(a, b),
31923            6 => simd_gt(a, b),
31924            _ => i32x8::splat(-1),
31925        };
31926        simd_bitmask(r)
31927    }
31928}
31929
31930/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31931///
31932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31933#[inline]
31934#[target_feature(enable = "avx512f,avx512vl")]
31935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31936#[rustc_legacy_const_generics(3)]
31937#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31938pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31939    k1: __mmask8,
31940    a: __m256i,
31941    b: __m256i,
31942) -> __mmask8 {
31943    unsafe {
31944        static_assert_uimm_bits!(IMM3, 3);
31945        let a = a.as_i32x8();
31946        let b = b.as_i32x8();
31947        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31948        let r = match IMM3 {
31949            0 => simd_and(k1, simd_eq(a, b)),
31950            1 => simd_and(k1, simd_lt(a, b)),
31951            2 => simd_and(k1, simd_le(a, b)),
31952            3 => i32x8::ZERO,
31953            4 => simd_and(k1, simd_ne(a, b)),
31954            5 => simd_and(k1, simd_ge(a, b)),
31955            6 => simd_and(k1, simd_gt(a, b)),
31956            _ => k1,
31957        };
31958        simd_bitmask(r)
31959    }
31960}
31961
31962/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31963///
31964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31965#[inline]
31966#[target_feature(enable = "avx512f,avx512vl")]
31967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31968#[rustc_legacy_const_generics(2)]
31969#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31970pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31971    unsafe {
31972        static_assert_uimm_bits!(IMM3, 3);
31973        let a = a.as_i32x4();
31974        let b = b.as_i32x4();
31975        let r = match IMM3 {
31976            0 => simd_eq(a, b),
31977            1 => simd_lt(a, b),
31978            2 => simd_le(a, b),
31979            3 => i32x4::ZERO,
31980            4 => simd_ne(a, b),
31981            5 => simd_ge(a, b),
31982            6 => simd_gt(a, b),
31983            _ => i32x4::splat(-1),
31984        };
31985        simd_bitmask(r)
31986    }
31987}
31988
31989/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31990///
31991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
31992#[inline]
31993#[target_feature(enable = "avx512f,avx512vl")]
31994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31995#[rustc_legacy_const_generics(3)]
31996#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31997pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31998    k1: __mmask8,
31999    a: __m128i,
32000    b: __m128i,
32001) -> __mmask8 {
32002    unsafe {
32003        static_assert_uimm_bits!(IMM3, 3);
32004        let a = a.as_i32x4();
32005        let b = b.as_i32x4();
32006        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32007        let r = match IMM3 {
32008            0 => simd_and(k1, simd_eq(a, b)),
32009            1 => simd_and(k1, simd_lt(a, b)),
32010            2 => simd_and(k1, simd_le(a, b)),
32011            3 => i32x4::ZERO,
32012            4 => simd_and(k1, simd_ne(a, b)),
32013            5 => simd_and(k1, simd_ge(a, b)),
32014            6 => simd_and(k1, simd_gt(a, b)),
32015            _ => k1,
32016        };
32017        simd_bitmask(r)
32018    }
32019}
32020
32021/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32022///
32023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32024#[inline]
32025#[target_feature(enable = "avx512f")]
32026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32027#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32028pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32029    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
32030}
32031
32032/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32033///
32034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32035#[inline]
32036#[target_feature(enable = "avx512f")]
32037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32038#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32039pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32040    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32041}
32042
32043/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32044///
32045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32046#[inline]
32047#[target_feature(enable = "avx512f,avx512vl")]
32048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32049#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32050pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32051    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
32052}
32053
32054/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32055///
32056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32057#[inline]
32058#[target_feature(enable = "avx512f,avx512vl")]
32059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32060#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32061pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32062    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32063}
32064
32065/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32066///
32067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32068#[inline]
32069#[target_feature(enable = "avx512f,avx512vl")]
32070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32071#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32072pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32073    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
32074}
32075
32076/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32077///
32078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32079#[inline]
32080#[target_feature(enable = "avx512f,avx512vl")]
32081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32082#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32083pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32084    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32085}
32086
32087/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32088///
32089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32090#[inline]
32091#[target_feature(enable = "avx512f")]
32092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32093#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32094pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32095    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
32096}
32097
32098/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32099///
32100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32101#[inline]
32102#[target_feature(enable = "avx512f")]
32103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32104#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32105pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32106    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32107}
32108
32109/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32110///
32111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32112#[inline]
32113#[target_feature(enable = "avx512f,avx512vl")]
32114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32115#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32116pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32117    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
32118}
32119
32120/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32121///
32122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32123#[inline]
32124#[target_feature(enable = "avx512f,avx512vl")]
32125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32126#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32127pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32128    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32129}
32130
32131/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32132///
32133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32134#[inline]
32135#[target_feature(enable = "avx512f,avx512vl")]
32136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32137#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32138pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32139    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
32140}
32141
32142/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32143///
32144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32145#[inline]
32146#[target_feature(enable = "avx512f,avx512vl")]
32147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32148#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32149pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32150    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32151}
32152
32153/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32154///
32155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32156#[inline]
32157#[target_feature(enable = "avx512f")]
32158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32159#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32160pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32161    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
32162}
32163
32164/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32165///
32166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32167#[inline]
32168#[target_feature(enable = "avx512f")]
32169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32170#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32171pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32172    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32173}
32174
32175/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32176///
32177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32178#[inline]
32179#[target_feature(enable = "avx512f,avx512vl")]
32180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32181#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32182pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32183    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
32184}
32185
32186/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32187///
32188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32189#[inline]
32190#[target_feature(enable = "avx512f,avx512vl")]
32191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32192#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32193pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32194    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32195}
32196
32197/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32198///
32199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32200#[inline]
32201#[target_feature(enable = "avx512f,avx512vl")]
32202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32203#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32204pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32205    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
32206}
32207
32208/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32209///
32210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32211#[inline]
32212#[target_feature(enable = "avx512f,avx512vl")]
32213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32214#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32215pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32216    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32217}
32218
32219/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32220///
32221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32222#[inline]
32223#[target_feature(enable = "avx512f")]
32224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32225#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32226pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32227    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
32228}
32229
32230/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32231///
32232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32233#[inline]
32234#[target_feature(enable = "avx512f")]
32235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32236#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32237pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32238    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32239}
32240
32241/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32242///
32243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32244#[inline]
32245#[target_feature(enable = "avx512f,avx512vl")]
32246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32247#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32248pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32249    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
32250}
32251
32252/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32253///
32254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32255#[inline]
32256#[target_feature(enable = "avx512f,avx512vl")]
32257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32258#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32259pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32260    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32261}
32262
32263/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32264///
32265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32266#[inline]
32267#[target_feature(enable = "avx512f,avx512vl")]
32268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32269#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32270pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32271    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
32272}
32273
32274/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32275///
32276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32277#[inline]
32278#[target_feature(enable = "avx512f,avx512vl")]
32279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32280#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32281pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32282    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32283}
32284
32285/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32286///
32287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32288#[inline]
32289#[target_feature(enable = "avx512f")]
32290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32291#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32292pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32293    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
32294}
32295
32296/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32297///
32298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32299#[inline]
32300#[target_feature(enable = "avx512f")]
32301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32302#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32303pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32304    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32305}
32306
32307/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32308///
32309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32310#[inline]
32311#[target_feature(enable = "avx512f,avx512vl")]
32312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32313#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32314pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32315    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
32316}
32317
32318/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32319///
32320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32321#[inline]
32322#[target_feature(enable = "avx512f,avx512vl")]
32323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32324#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32325pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32326    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32327}
32328
32329/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32330///
32331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32332#[inline]
32333#[target_feature(enable = "avx512f,avx512vl")]
32334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32335#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32336pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32337    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
32338}
32339
32340/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32341///
32342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32343#[inline]
32344#[target_feature(enable = "avx512f,avx512vl")]
32345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32346#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32347pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32348    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32349}
32350
32351/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32352///
32353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32354#[inline]
32355#[target_feature(enable = "avx512f")]
32356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32357#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32358pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32359    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
32360}
32361
32362/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32363///
32364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32365#[inline]
32366#[target_feature(enable = "avx512f")]
32367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32368#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32369pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32370    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32371}
32372
32373/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32374///
32375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32376#[inline]
32377#[target_feature(enable = "avx512f,avx512vl")]
32378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32379#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32380pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32381    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
32382}
32383
32384/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32385///
32386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32387#[inline]
32388#[target_feature(enable = "avx512f,avx512vl")]
32389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32390#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32391pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32392    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32393}
32394
32395/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32396///
32397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32398#[inline]
32399#[target_feature(enable = "avx512f,avx512vl")]
32400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32401#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32402pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32403    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
32404}
32405
32406/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32407///
32408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32409#[inline]
32410#[target_feature(enable = "avx512f,avx512vl")]
32411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32412#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32413pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32414    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32415}
32416
32417/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32418///
32419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32420#[inline]
32421#[target_feature(enable = "avx512f")]
32422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32423#[rustc_legacy_const_generics(2)]
32424#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32425pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32426    unsafe {
32427        static_assert_uimm_bits!(IMM3, 3);
32428        let a = a.as_u64x8();
32429        let b = b.as_u64x8();
32430        let r = match IMM3 {
32431            0 => simd_eq(a, b),
32432            1 => simd_lt(a, b),
32433            2 => simd_le(a, b),
32434            3 => i64x8::ZERO,
32435            4 => simd_ne(a, b),
32436            5 => simd_ge(a, b),
32437            6 => simd_gt(a, b),
32438            _ => i64x8::splat(-1),
32439        };
32440        simd_bitmask(r)
32441    }
32442}
32443
32444/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32445///
32446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32447#[inline]
32448#[target_feature(enable = "avx512f")]
32449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32450#[rustc_legacy_const_generics(3)]
32451#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32452pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32453    k1: __mmask8,
32454    a: __m512i,
32455    b: __m512i,
32456) -> __mmask8 {
32457    unsafe {
32458        static_assert_uimm_bits!(IMM3, 3);
32459        let a = a.as_u64x8();
32460        let b = b.as_u64x8();
32461        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
32462        let r = match IMM3 {
32463            0 => simd_and(k1, simd_eq(a, b)),
32464            1 => simd_and(k1, simd_lt(a, b)),
32465            2 => simd_and(k1, simd_le(a, b)),
32466            3 => i64x8::ZERO,
32467            4 => simd_and(k1, simd_ne(a, b)),
32468            5 => simd_and(k1, simd_ge(a, b)),
32469            6 => simd_and(k1, simd_gt(a, b)),
32470            _ => k1,
32471        };
32472        simd_bitmask(r)
32473    }
32474}
32475
32476/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32477///
32478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32479#[inline]
32480#[target_feature(enable = "avx512f,avx512vl")]
32481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32482#[rustc_legacy_const_generics(2)]
32483#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32484pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32485    unsafe {
32486        static_assert_uimm_bits!(IMM3, 3);
32487        let a = a.as_u64x4();
32488        let b = b.as_u64x4();
32489        let r = match IMM3 {
32490            0 => simd_eq(a, b),
32491            1 => simd_lt(a, b),
32492            2 => simd_le(a, b),
32493            3 => i64x4::ZERO,
32494            4 => simd_ne(a, b),
32495            5 => simd_ge(a, b),
32496            6 => simd_gt(a, b),
32497            _ => i64x4::splat(-1),
32498        };
32499        simd_bitmask(r)
32500    }
32501}
32502
32503/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32504///
32505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32506#[inline]
32507#[target_feature(enable = "avx512f,avx512vl")]
32508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32509#[rustc_legacy_const_generics(3)]
32510#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32511pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32512    k1: __mmask8,
32513    a: __m256i,
32514    b: __m256i,
32515) -> __mmask8 {
32516    unsafe {
32517        static_assert_uimm_bits!(IMM3, 3);
32518        let a = a.as_u64x4();
32519        let b = b.as_u64x4();
32520        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
32521        let r = match IMM3 {
32522            0 => simd_and(k1, simd_eq(a, b)),
32523            1 => simd_and(k1, simd_lt(a, b)),
32524            2 => simd_and(k1, simd_le(a, b)),
32525            3 => i64x4::ZERO,
32526            4 => simd_and(k1, simd_ne(a, b)),
32527            5 => simd_and(k1, simd_ge(a, b)),
32528            6 => simd_and(k1, simd_gt(a, b)),
32529            _ => k1,
32530        };
32531        simd_bitmask(r)
32532    }
32533}
32534
32535/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32536///
32537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32538#[inline]
32539#[target_feature(enable = "avx512f,avx512vl")]
32540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32541#[rustc_legacy_const_generics(2)]
32542#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32543pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32544    unsafe {
32545        static_assert_uimm_bits!(IMM3, 3);
32546        let a = a.as_u64x2();
32547        let b = b.as_u64x2();
32548        let r = match IMM3 {
32549            0 => simd_eq(a, b),
32550            1 => simd_lt(a, b),
32551            2 => simd_le(a, b),
32552            3 => i64x2::ZERO,
32553            4 => simd_ne(a, b),
32554            5 => simd_ge(a, b),
32555            6 => simd_gt(a, b),
32556            _ => i64x2::splat(-1),
32557        };
32558        simd_bitmask(r)
32559    }
32560}
32561
32562/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32563///
32564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32565#[inline]
32566#[target_feature(enable = "avx512f,avx512vl")]
32567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32568#[rustc_legacy_const_generics(3)]
32569#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32570pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32571    k1: __mmask8,
32572    a: __m128i,
32573    b: __m128i,
32574) -> __mmask8 {
32575    unsafe {
32576        static_assert_uimm_bits!(IMM3, 3);
32577        let a = a.as_u64x2();
32578        let b = b.as_u64x2();
32579        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
32580        let r = match IMM3 {
32581            0 => simd_and(k1, simd_eq(a, b)),
32582            1 => simd_and(k1, simd_lt(a, b)),
32583            2 => simd_and(k1, simd_le(a, b)),
32584            3 => i64x2::ZERO,
32585            4 => simd_and(k1, simd_ne(a, b)),
32586            5 => simd_and(k1, simd_ge(a, b)),
32587            6 => simd_and(k1, simd_gt(a, b)),
32588            _ => k1,
32589        };
32590        simd_bitmask(r)
32591    }
32592}
32593
32594/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32595///
32596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32597#[inline]
32598#[target_feature(enable = "avx512f")]
32599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32600#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32601pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32602    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
32603}
32604
32605/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32606///
32607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32608#[inline]
32609#[target_feature(enable = "avx512f")]
32610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32611#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32612pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32613    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32614}
32615
32616/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32617///
32618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32619#[inline]
32620#[target_feature(enable = "avx512f,avx512vl")]
32621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32622#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32623pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32624    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
32625}
32626
32627/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32628///
32629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32630#[inline]
32631#[target_feature(enable = "avx512f,avx512vl")]
32632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32633#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32634pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32635    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32636}
32637
32638/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32639///
32640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32641#[inline]
32642#[target_feature(enable = "avx512f,avx512vl")]
32643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32644#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32645pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32646    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
32647}
32648
32649/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32650///
32651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32652#[inline]
32653#[target_feature(enable = "avx512f,avx512vl")]
32654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32655#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32656pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32657    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32658}
32659
32660/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32661///
32662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32663#[inline]
32664#[target_feature(enable = "avx512f")]
32665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32666#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32667pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32668    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
32669}
32670
32671/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32672///
32673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32674#[inline]
32675#[target_feature(enable = "avx512f")]
32676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32677#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32678pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32679    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32680}
32681
32682/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32683///
32684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32685#[inline]
32686#[target_feature(enable = "avx512f,avx512vl")]
32687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32688#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32689pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32690    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
32691}
32692
32693/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32694///
32695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32696#[inline]
32697#[target_feature(enable = "avx512f,avx512vl")]
32698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32699#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32700pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32701    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32702}
32703
32704/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32705///
32706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32707#[inline]
32708#[target_feature(enable = "avx512f,avx512vl")]
32709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32710#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32711pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32712    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
32713}
32714
32715/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32716///
32717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32718#[inline]
32719#[target_feature(enable = "avx512f,avx512vl")]
32720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32721#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32722pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32723    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32724}
32725
32726/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32727///
32728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32729#[inline]
32730#[target_feature(enable = "avx512f")]
32731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32732#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32733pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32734    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
32735}
32736
32737/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32738///
32739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32740#[inline]
32741#[target_feature(enable = "avx512f")]
32742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32743#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32744pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32745    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32746}
32747
32748/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32749///
32750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32751#[inline]
32752#[target_feature(enable = "avx512f,avx512vl")]
32753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32754#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32755pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32756    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
32757}
32758
32759/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32760///
32761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32762#[inline]
32763#[target_feature(enable = "avx512f,avx512vl")]
32764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32765#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32766pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32767    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32768}
32769
32770/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32771///
32772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32773#[inline]
32774#[target_feature(enable = "avx512f,avx512vl")]
32775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32776#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32777pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32778    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
32779}
32780
32781/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32782///
32783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32784#[inline]
32785#[target_feature(enable = "avx512f,avx512vl")]
32786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32787#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32788pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32789    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32790}
32791
32792/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32793///
32794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32795#[inline]
32796#[target_feature(enable = "avx512f")]
32797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32798#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32799pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32800    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
32801}
32802
32803/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32804///
32805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32806#[inline]
32807#[target_feature(enable = "avx512f")]
32808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32809#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32810pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32811    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32812}
32813
32814/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32815///
32816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32817#[inline]
32818#[target_feature(enable = "avx512f,avx512vl")]
32819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32820#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32821pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32822    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
32823}
32824
32825/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32826///
32827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32828#[inline]
32829#[target_feature(enable = "avx512f,avx512vl")]
32830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32831#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32832pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32833    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32834}
32835
32836/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32837///
32838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32839#[inline]
32840#[target_feature(enable = "avx512f,avx512vl")]
32841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32842#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32843pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32844    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
32845}
32846
32847/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32848///
32849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32850#[inline]
32851#[target_feature(enable = "avx512f,avx512vl")]
32852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32853#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32854pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32855    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32856}
32857
32858/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32859///
32860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32861#[inline]
32862#[target_feature(enable = "avx512f")]
32863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32864#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32865pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32866    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
32867}
32868
32869/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32870///
32871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32872#[inline]
32873#[target_feature(enable = "avx512f")]
32874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32875#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32876pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32877    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32878}
32879
32880/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32881///
32882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32883#[inline]
32884#[target_feature(enable = "avx512f,avx512vl")]
32885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32886#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32887pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32888    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
32889}
32890
32891/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32892///
32893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32894#[inline]
32895#[target_feature(enable = "avx512f,avx512vl")]
32896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32897#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32898pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32899    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32900}
32901
32902/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32903///
32904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32905#[inline]
32906#[target_feature(enable = "avx512f,avx512vl")]
32907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32908#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32909pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32910    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
32911}
32912
32913/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32914///
32915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32916#[inline]
32917#[target_feature(enable = "avx512f,avx512vl")]
32918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32919#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32920pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32921    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32922}
32923
32924/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32925///
32926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32927#[inline]
32928#[target_feature(enable = "avx512f")]
32929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32930#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32931pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32932    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
32933}
32934
32935/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32936///
32937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32938#[inline]
32939#[target_feature(enable = "avx512f")]
32940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32941#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32942pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32943    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32944}
32945
32946/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32947///
32948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32949#[inline]
32950#[target_feature(enable = "avx512f,avx512vl")]
32951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32952#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32953pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32954    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
32955}
32956
32957/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32958///
32959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32960#[inline]
32961#[target_feature(enable = "avx512f,avx512vl")]
32962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32963#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32964pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32965    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32966}
32967
32968/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32969///
32970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
32971#[inline]
32972#[target_feature(enable = "avx512f,avx512vl")]
32973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32974#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32975pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32976    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
32977}
32978
32979/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32980///
32981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
32982#[inline]
32983#[target_feature(enable = "avx512f,avx512vl")]
32984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32985#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32986pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32987    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32988}
32989
32990/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32991///
32992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
32993#[inline]
32994#[target_feature(enable = "avx512f")]
32995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32996#[rustc_legacy_const_generics(2)]
32997#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32998pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32999    unsafe {
33000        static_assert_uimm_bits!(IMM3, 3);
33001        let a = a.as_i64x8();
33002        let b = b.as_i64x8();
33003        let r = match IMM3 {
33004            0 => simd_eq(a, b),
33005            1 => simd_lt(a, b),
33006            2 => simd_le(a, b),
33007            3 => i64x8::ZERO,
33008            4 => simd_ne(a, b),
33009            5 => simd_ge(a, b),
33010            6 => simd_gt(a, b),
33011            _ => i64x8::splat(-1),
33012        };
33013        simd_bitmask(r)
33014    }
33015}
33016
33017/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33018///
33019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33020#[inline]
33021#[target_feature(enable = "avx512f")]
33022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33023#[rustc_legacy_const_generics(3)]
33024#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33025pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33026    k1: __mmask8,
33027    a: __m512i,
33028    b: __m512i,
33029) -> __mmask8 {
33030    unsafe {
33031        static_assert_uimm_bits!(IMM3, 3);
33032        let a = a.as_i64x8();
33033        let b = b.as_i64x8();
33034        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33035        let r = match IMM3 {
33036            0 => simd_and(k1, simd_eq(a, b)),
33037            1 => simd_and(k1, simd_lt(a, b)),
33038            2 => simd_and(k1, simd_le(a, b)),
33039            3 => i64x8::ZERO,
33040            4 => simd_and(k1, simd_ne(a, b)),
33041            5 => simd_and(k1, simd_ge(a, b)),
33042            6 => simd_and(k1, simd_gt(a, b)),
33043            _ => k1,
33044        };
33045        simd_bitmask(r)
33046    }
33047}
33048
33049/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33050///
33051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33052#[inline]
33053#[target_feature(enable = "avx512f,avx512vl")]
33054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33055#[rustc_legacy_const_generics(2)]
33056#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33057pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33058    unsafe {
33059        static_assert_uimm_bits!(IMM3, 3);
33060        let a = a.as_i64x4();
33061        let b = b.as_i64x4();
33062        let r = match IMM3 {
33063            0 => simd_eq(a, b),
33064            1 => simd_lt(a, b),
33065            2 => simd_le(a, b),
33066            3 => i64x4::ZERO,
33067            4 => simd_ne(a, b),
33068            5 => simd_ge(a, b),
33069            6 => simd_gt(a, b),
33070            _ => i64x4::splat(-1),
33071        };
33072        simd_bitmask(r)
33073    }
33074}
33075
33076/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33077///
33078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33079#[inline]
33080#[target_feature(enable = "avx512f,avx512vl")]
33081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33082#[rustc_legacy_const_generics(3)]
33083#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33084pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33085    k1: __mmask8,
33086    a: __m256i,
33087    b: __m256i,
33088) -> __mmask8 {
33089    unsafe {
33090        static_assert_uimm_bits!(IMM3, 3);
33091        let a = a.as_i64x4();
33092        let b = b.as_i64x4();
33093        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
33094        let r = match IMM3 {
33095            0 => simd_and(k1, simd_eq(a, b)),
33096            1 => simd_and(k1, simd_lt(a, b)),
33097            2 => simd_and(k1, simd_le(a, b)),
33098            3 => i64x4::ZERO,
33099            4 => simd_and(k1, simd_ne(a, b)),
33100            5 => simd_and(k1, simd_ge(a, b)),
33101            6 => simd_and(k1, simd_gt(a, b)),
33102            _ => k1,
33103        };
33104        simd_bitmask(r)
33105    }
33106}
33107
33108/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33109///
33110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33111#[inline]
33112#[target_feature(enable = "avx512f,avx512vl")]
33113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33114#[rustc_legacy_const_generics(2)]
33115#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33116pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33117    unsafe {
33118        static_assert_uimm_bits!(IMM3, 3);
33119        let a = a.as_i64x2();
33120        let b = b.as_i64x2();
33121        let r = match IMM3 {
33122            0 => simd_eq(a, b),
33123            1 => simd_lt(a, b),
33124            2 => simd_le(a, b),
33125            3 => i64x2::ZERO,
33126            4 => simd_ne(a, b),
33127            5 => simd_ge(a, b),
33128            6 => simd_gt(a, b),
33129            _ => i64x2::splat(-1),
33130        };
33131        simd_bitmask(r)
33132    }
33133}
33134
33135/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33136///
33137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33138#[inline]
33139#[target_feature(enable = "avx512f,avx512vl")]
33140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33141#[rustc_legacy_const_generics(3)]
33142#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33143pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33144    k1: __mmask8,
33145    a: __m128i,
33146    b: __m128i,
33147) -> __mmask8 {
33148    unsafe {
33149        static_assert_uimm_bits!(IMM3, 3);
33150        let a = a.as_i64x2();
33151        let b = b.as_i64x2();
33152        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
33153        let r = match IMM3 {
33154            0 => simd_and(k1, simd_eq(a, b)),
33155            1 => simd_and(k1, simd_lt(a, b)),
33156            2 => simd_and(k1, simd_le(a, b)),
33157            3 => i64x2::ZERO,
33158            4 => simd_and(k1, simd_ne(a, b)),
33159            5 => simd_and(k1, simd_ge(a, b)),
33160            6 => simd_and(k1, simd_gt(a, b)),
33161            _ => k1,
33162        };
33163        simd_bitmask(r)
33164    }
33165}
33166
33167/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33168///
33169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33170#[inline]
33171#[target_feature(enable = "avx512f")]
33172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33173pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33174    unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33175}
33176
33177/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33178///
33179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33180#[inline]
33181#[target_feature(enable = "avx512f")]
33182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33183pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33184    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33185}
33186
33187/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33188///
33189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33190#[inline]
33191#[target_feature(enable = "avx512f")]
33192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33193pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33194    unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33195}
33196
33197/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33198///
33199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33200#[inline]
33201#[target_feature(enable = "avx512f")]
33202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33203pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33204    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33205}
33206
33207/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33208///
33209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33210#[inline]
33211#[target_feature(enable = "avx512f")]
33212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33213pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33214    unsafe {
33215        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33216        let a = _mm256_add_ps(
33217            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33218            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33219        );
33220        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33221        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33222        simd_extract!(a, 0, f32) + simd_extract!(a, 1, f32)
33223    }
33224}
33225
33226/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33227///
33228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33229#[inline]
33230#[target_feature(enable = "avx512f")]
33231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33232pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33233    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
33234}
33235
33236/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33237///
33238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33239#[inline]
33240#[target_feature(enable = "avx512f")]
33241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33242pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33243    unsafe {
33244        let a = _mm256_add_pd(
33245            _mm512_extractf64x4_pd::<0>(a),
33246            _mm512_extractf64x4_pd::<1>(a),
33247        );
33248        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33249        simd_extract!(a, 0, f64) + simd_extract!(a, 1, f64)
33250    }
33251}
33252
33253/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33254///
33255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33256#[inline]
33257#[target_feature(enable = "avx512f")]
33258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33259pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33260    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
33261}
33262
33263/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33264///
33265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33266#[inline]
33267#[target_feature(enable = "avx512f")]
33268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33269pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33270    unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33271}
33272
33273/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33274///
33275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33276#[inline]
33277#[target_feature(enable = "avx512f")]
33278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33279pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33280    unsafe {
33281        simd_reduce_mul_unordered(simd_select_bitmask(
33282            k,
33283            a.as_i32x16(),
33284            _mm512_set1_epi32(1).as_i32x16(),
33285        ))
33286    }
33287}
33288
33289/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33290///
33291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33292#[inline]
33293#[target_feature(enable = "avx512f")]
33294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33295pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33296    unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33297}
33298
33299/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33300///
33301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33302#[inline]
33303#[target_feature(enable = "avx512f")]
33304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33305pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33306    unsafe {
33307        simd_reduce_mul_unordered(simd_select_bitmask(
33308            k,
33309            a.as_i64x8(),
33310            _mm512_set1_epi64(1).as_i64x8(),
33311        ))
33312    }
33313}
33314
33315/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33316///
33317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33318#[inline]
33319#[target_feature(enable = "avx512f")]
33320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33321pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33322    unsafe {
33323        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33324        let a = _mm256_mul_ps(
33325            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33326            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33327        );
33328        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33329        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33330        simd_extract!(a, 0, f32) * simd_extract!(a, 1, f32)
33331    }
33332}
33333
33334/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33335///
33336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33337#[inline]
33338#[target_feature(enable = "avx512f")]
33339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33340pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33341    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
33342}
33343
33344/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33345///
33346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33347#[inline]
33348#[target_feature(enable = "avx512f")]
33349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33350pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33351    unsafe {
33352        let a = _mm256_mul_pd(
33353            _mm512_extractf64x4_pd::<0>(a),
33354            _mm512_extractf64x4_pd::<1>(a),
33355        );
33356        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33357        simd_extract!(a, 0, f64) * simd_extract!(a, 1, f64)
33358    }
33359}
33360
33361/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33362///
33363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33364#[inline]
33365#[target_feature(enable = "avx512f")]
33366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33367pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33368    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
33369}
33370
33371/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33372///
33373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33374#[inline]
33375#[target_feature(enable = "avx512f")]
33376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33377pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33378    unsafe { simd_reduce_max(a.as_i32x16()) }
33379}
33380
33381/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33382///
33383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33384#[inline]
33385#[target_feature(enable = "avx512f")]
33386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33387pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33388    unsafe {
33389        simd_reduce_max(simd_select_bitmask(
33390            k,
33391            a.as_i32x16(),
33392            i32x16::splat(i32::MIN),
33393        ))
33394    }
33395}
33396
33397/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33398///
33399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33400#[inline]
33401#[target_feature(enable = "avx512f")]
33402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33403pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33404    unsafe { simd_reduce_max(a.as_i64x8()) }
33405}
33406
33407/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33408///
33409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33410#[inline]
33411#[target_feature(enable = "avx512f")]
33412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33413pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33414    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
33415}
33416
33417/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33418///
33419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33420#[inline]
33421#[target_feature(enable = "avx512f")]
33422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33423pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33424    unsafe { simd_reduce_max(a.as_u32x16()) }
33425}
33426
33427/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33428///
33429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33430#[inline]
33431#[target_feature(enable = "avx512f")]
33432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33433pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33434    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
33435}
33436
33437/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33438///
33439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33440#[inline]
33441#[target_feature(enable = "avx512f")]
33442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33443pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33444    unsafe { simd_reduce_max(a.as_u64x8()) }
33445}
33446
33447/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33448///
33449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33450#[inline]
33451#[target_feature(enable = "avx512f")]
33452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33453pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33454    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
33455}
33456
33457/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33458///
33459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33460#[inline]
33461#[target_feature(enable = "avx512f")]
33462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33463pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33464    unsafe {
33465        let a = _mm256_max_ps(
33466            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33467            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33468        );
33469        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33470        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33471        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
33472    }
33473}
33474
33475/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33476///
33477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33478#[inline]
33479#[target_feature(enable = "avx512f")]
33480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33481pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33482    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
33483}
33484
33485/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33486///
33487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33488#[inline]
33489#[target_feature(enable = "avx512f")]
33490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33491pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33492    unsafe {
33493        let a = _mm256_max_pd(
33494            _mm512_extractf64x4_pd::<0>(a),
33495            _mm512_extractf64x4_pd::<1>(a),
33496        );
33497        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33498        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
33499    }
33500}
33501
33502/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33503///
33504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33505#[inline]
33506#[target_feature(enable = "avx512f")]
33507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33508pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33509    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
33510}
33511
33512/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33513///
33514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33515#[inline]
33516#[target_feature(enable = "avx512f")]
33517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33518pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33519    unsafe { simd_reduce_min(a.as_i32x16()) }
33520}
33521
33522/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33523///
33524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33525#[inline]
33526#[target_feature(enable = "avx512f")]
33527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33528pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33529    unsafe {
33530        simd_reduce_min(simd_select_bitmask(
33531            k,
33532            a.as_i32x16(),
33533            i32x16::splat(i32::MAX),
33534        ))
33535    }
33536}
33537
33538/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33539///
33540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33541#[inline]
33542#[target_feature(enable = "avx512f")]
33543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33544pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33545    unsafe { simd_reduce_min(a.as_i64x8()) }
33546}
33547
33548/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33549///
33550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33551#[inline]
33552#[target_feature(enable = "avx512f")]
33553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33554pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33555    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
33556}
33557
33558/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33559///
33560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33561#[inline]
33562#[target_feature(enable = "avx512f")]
33563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33564pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33565    unsafe { simd_reduce_min(a.as_u32x16()) }
33566}
33567
33568/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33569///
33570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33571#[inline]
33572#[target_feature(enable = "avx512f")]
33573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33574pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33575    unsafe {
33576        simd_reduce_min(simd_select_bitmask(
33577            k,
33578            a.as_u32x16(),
33579            u32x16::splat(u32::MAX),
33580        ))
33581    }
33582}
33583
33584/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33585///
33586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33587#[inline]
33588#[target_feature(enable = "avx512f")]
33589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33590pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33591    unsafe { simd_reduce_min(a.as_u64x8()) }
33592}
33593
33594/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33595///
33596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33597#[inline]
33598#[target_feature(enable = "avx512f")]
33599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33600pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33601    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
33602}
33603
33604/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33605///
33606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33607#[inline]
33608#[target_feature(enable = "avx512f")]
33609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33610pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33611    unsafe {
33612        let a = _mm256_min_ps(
33613            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33614            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33615        );
33616        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33617        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33618        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
33619    }
33620}
33621
33622/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33623///
33624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33625#[inline]
33626#[target_feature(enable = "avx512f")]
33627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33628pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33629    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
33630}
33631
33632/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33633///
33634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33635#[inline]
33636#[target_feature(enable = "avx512f")]
33637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33638pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33639    unsafe {
33640        let a = _mm256_min_pd(
33641            _mm512_extractf64x4_pd::<0>(a),
33642            _mm512_extractf64x4_pd::<1>(a),
33643        );
33644        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33645        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
33646    }
33647}
33648
33649/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33650///
33651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33652#[inline]
33653#[target_feature(enable = "avx512f")]
33654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33655pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33656    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
33657}
33658
33659/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33660///
33661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33662#[inline]
33663#[target_feature(enable = "avx512f")]
33664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33665pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33666    unsafe { simd_reduce_and(a.as_i32x16()) }
33667}
33668
33669/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33670///
33671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33672#[inline]
33673#[target_feature(enable = "avx512f")]
33674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33675pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33676    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
33677}
33678
33679/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33680///
33681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33682#[inline]
33683#[target_feature(enable = "avx512f")]
33684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33685pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33686    unsafe { simd_reduce_and(a.as_i64x8()) }
33687}
33688
33689/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33690///
33691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33692#[inline]
33693#[target_feature(enable = "avx512f")]
33694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33695pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33696    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
33697}
33698
33699/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33700///
33701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33702#[inline]
33703#[target_feature(enable = "avx512f")]
33704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33705pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33706    unsafe { simd_reduce_or(a.as_i32x16()) }
33707}
33708
33709/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33710///
33711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33712#[inline]
33713#[target_feature(enable = "avx512f")]
33714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33715pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33716    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33717}
33718
33719/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33720///
33721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33722#[inline]
33723#[target_feature(enable = "avx512f")]
33724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33725pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33726    unsafe { simd_reduce_or(a.as_i64x8()) }
33727}
33728
33729/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33730///
33731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33732#[inline]
33733#[target_feature(enable = "avx512f")]
33734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33735pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33736    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33737}
33738
33739/// Returns vector of type `__m512d` with indeterminate elements.
33740/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33741/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33742/// In practice, this is typically equivalent to [`mem::zeroed`].
33743///
33744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33745#[inline]
33746#[target_feature(enable = "avx512f")]
33747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33748// This intrinsic has no corresponding instruction.
33749pub fn _mm512_undefined_pd() -> __m512d {
33750    unsafe { const { mem::zeroed() } }
33751}
33752
33753/// Returns vector of type `__m512` with indeterminate elements.
33754/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33755/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33756/// In practice, this is typically equivalent to [`mem::zeroed`].
33757///
33758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33759#[inline]
33760#[target_feature(enable = "avx512f")]
33761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33762// This intrinsic has no corresponding instruction.
33763pub fn _mm512_undefined_ps() -> __m512 {
33764    unsafe { const { mem::zeroed() } }
33765}
33766
33767/// Return vector of type __m512i with indeterminate elements.
33768/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33769/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33770/// In practice, this is typically equivalent to [`mem::zeroed`].
33771///
33772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33773#[inline]
33774#[target_feature(enable = "avx512f")]
33775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33776// This intrinsic has no corresponding instruction.
33777pub fn _mm512_undefined_epi32() -> __m512i {
33778    unsafe { const { mem::zeroed() } }
33779}
33780
33781/// Return vector of type __m512 with indeterminate elements.
33782/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33783/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33784/// In practice, this is typically equivalent to [`mem::zeroed`].
33785///
33786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33787#[inline]
33788#[target_feature(enable = "avx512f")]
33789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33790// This intrinsic has no corresponding instruction.
33791pub fn _mm512_undefined() -> __m512 {
33792    unsafe { const { mem::zeroed() } }
33793}
33794
33795/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33796///
33797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33798#[inline]
33799#[target_feature(enable = "avx512f")]
33800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33801#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33802pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33803    ptr::read_unaligned(mem_addr as *const __m512i)
33804}
33805
33806/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33807///
33808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33809#[inline]
33810#[target_feature(enable = "avx512f,avx512vl")]
33811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33812#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33813pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33814    ptr::read_unaligned(mem_addr as *const __m256i)
33815}
33816
33817/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33818///
33819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33820#[inline]
33821#[target_feature(enable = "avx512f,avx512vl")]
33822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33823#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33824pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33825    ptr::read_unaligned(mem_addr as *const __m128i)
33826}
33827
33828/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33829///
33830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33831#[inline]
33832#[target_feature(enable = "avx512f")]
33833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33834#[cfg_attr(test, assert_instr(vpmovdw))]
33835pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33836    vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k);
33837}
33838
33839/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33840///
33841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33842#[inline]
33843#[target_feature(enable = "avx512f,avx512vl")]
33844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33845#[cfg_attr(test, assert_instr(vpmovdw))]
33846pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33847    vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33848}
33849
33850/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33851///
33852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33853#[inline]
33854#[target_feature(enable = "avx512f,avx512vl")]
33855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33856#[cfg_attr(test, assert_instr(vpmovdw))]
33857pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33858    vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33859}
33860
33861/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33862///
33863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33864#[inline]
33865#[target_feature(enable = "avx512f")]
33866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33867#[cfg_attr(test, assert_instr(vpmovsdw))]
33868pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33869    vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k);
33870}
33871
33872/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33873///
33874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33875#[inline]
33876#[target_feature(enable = "avx512f,avx512vl")]
33877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33878#[cfg_attr(test, assert_instr(vpmovsdw))]
33879pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33880    vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33881}
33882
33883/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33884///
33885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33886#[inline]
33887#[target_feature(enable = "avx512f,avx512vl")]
33888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33889#[cfg_attr(test, assert_instr(vpmovsdw))]
33890pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33891    vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33892}
33893
33894/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33895///
33896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33897#[inline]
33898#[target_feature(enable = "avx512f")]
33899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33900#[cfg_attr(test, assert_instr(vpmovusdw))]
33901pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33902    vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k);
33903}
33904
33905/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33906///
33907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33908#[inline]
33909#[target_feature(enable = "avx512f,avx512vl")]
33910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33911#[cfg_attr(test, assert_instr(vpmovusdw))]
33912pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33913    vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33914}
33915
33916/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33917///
33918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33919#[inline]
33920#[target_feature(enable = "avx512f,avx512vl")]
33921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33922#[cfg_attr(test, assert_instr(vpmovusdw))]
33923pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33924    vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33925}
33926
33927/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33928///
33929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33930#[inline]
33931#[target_feature(enable = "avx512f")]
33932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33933#[cfg_attr(test, assert_instr(vpmovdb))]
33934pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33935    vpmovdbmem(mem_addr, a.as_i32x16(), k);
33936}
33937
33938/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33939///
33940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33941#[inline]
33942#[target_feature(enable = "avx512f,avx512vl")]
33943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33944#[cfg_attr(test, assert_instr(vpmovdb))]
33945pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33946    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
33947}
33948
33949/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33950///
33951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33952#[inline]
33953#[target_feature(enable = "avx512f,avx512vl")]
33954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33955#[cfg_attr(test, assert_instr(vpmovdb))]
33956pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33957    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
33958}
33959
33960/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33961///
33962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33963#[inline]
33964#[target_feature(enable = "avx512f")]
33965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33966#[cfg_attr(test, assert_instr(vpmovsdb))]
33967pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33968    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
33969}
33970
33971/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33972///
33973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
33974#[inline]
33975#[target_feature(enable = "avx512f,avx512vl")]
33976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33977#[cfg_attr(test, assert_instr(vpmovsdb))]
33978pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33979    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
33980}
33981
33982/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33983///
33984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
33985#[inline]
33986#[target_feature(enable = "avx512f,avx512vl")]
33987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33988#[cfg_attr(test, assert_instr(vpmovsdb))]
33989pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33990    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
33991}
33992
33993/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33994///
33995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
33996#[inline]
33997#[target_feature(enable = "avx512f")]
33998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33999#[cfg_attr(test, assert_instr(vpmovusdb))]
34000pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34001    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
34002}
34003
34004/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34005///
34006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34007#[inline]
34008#[target_feature(enable = "avx512f,avx512vl")]
34009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34010#[cfg_attr(test, assert_instr(vpmovusdb))]
34011pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34012    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
34013}
34014
34015/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34016///
34017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34018#[inline]
34019#[target_feature(enable = "avx512f,avx512vl")]
34020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34021#[cfg_attr(test, assert_instr(vpmovusdb))]
34022pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34023    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
34024}
34025
34026/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34027///
34028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34029#[inline]
34030#[target_feature(enable = "avx512f")]
34031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34032#[cfg_attr(test, assert_instr(vpmovqw))]
34033pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34034    vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k);
34035}
34036
34037/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34038///
34039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34040#[inline]
34041#[target_feature(enable = "avx512f,avx512vl")]
34042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34043#[cfg_attr(test, assert_instr(vpmovqw))]
34044pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34045    vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34046}
34047
34048/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34049///
34050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34051#[inline]
34052#[target_feature(enable = "avx512f,avx512vl")]
34053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34054#[cfg_attr(test, assert_instr(vpmovqw))]
34055pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34056    vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34057}
34058
34059/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34060///
34061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34062#[inline]
34063#[target_feature(enable = "avx512f")]
34064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34065#[cfg_attr(test, assert_instr(vpmovsqw))]
34066pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34067    vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k);
34068}
34069
34070/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34071///
34072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34073#[inline]
34074#[target_feature(enable = "avx512f,avx512vl")]
34075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34076#[cfg_attr(test, assert_instr(vpmovsqw))]
34077pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34078    vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34079}
34080
34081/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34082///
34083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34084#[inline]
34085#[target_feature(enable = "avx512f,avx512vl")]
34086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34087#[cfg_attr(test, assert_instr(vpmovsqw))]
34088pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34089    vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34090}
34091
34092/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34093///
34094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34095#[inline]
34096#[target_feature(enable = "avx512f")]
34097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34098#[cfg_attr(test, assert_instr(vpmovusqw))]
34099pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34100    vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k);
34101}
34102
34103/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34104///
34105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34106#[inline]
34107#[target_feature(enable = "avx512f,avx512vl")]
34108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34109#[cfg_attr(test, assert_instr(vpmovusqw))]
34110pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34111    vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34112}
34113
34114/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34115///
34116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34117#[inline]
34118#[target_feature(enable = "avx512f,avx512vl")]
34119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34120#[cfg_attr(test, assert_instr(vpmovusqw))]
34121pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34122    vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34123}
34124
34125/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34126///
34127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34128#[inline]
34129#[target_feature(enable = "avx512f")]
34130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34131#[cfg_attr(test, assert_instr(vpmovqb))]
34132pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34133    vpmovqbmem(mem_addr, a.as_i64x8(), k);
34134}
34135
34136/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34137///
34138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34139#[inline]
34140#[target_feature(enable = "avx512f,avx512vl")]
34141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34142#[cfg_attr(test, assert_instr(vpmovqb))]
34143pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34144    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
34145}
34146
34147/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34148///
34149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34150#[inline]
34151#[target_feature(enable = "avx512f,avx512vl")]
34152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34153#[cfg_attr(test, assert_instr(vpmovqb))]
34154pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34155    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
34156}
34157
34158/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34159///
34160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34161#[inline]
34162#[target_feature(enable = "avx512f")]
34163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34164#[cfg_attr(test, assert_instr(vpmovsqb))]
34165pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34166    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
34167}
34168
34169/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34170///
34171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34172#[inline]
34173#[target_feature(enable = "avx512f,avx512vl")]
34174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34175#[cfg_attr(test, assert_instr(vpmovsqb))]
34176pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34177    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
34178}
34179
34180/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34181///
34182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34183#[inline]
34184#[target_feature(enable = "avx512f,avx512vl")]
34185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34186#[cfg_attr(test, assert_instr(vpmovsqb))]
34187pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34188    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
34189}
34190
34191/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34192///
34193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34194#[inline]
34195#[target_feature(enable = "avx512f")]
34196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34197#[cfg_attr(test, assert_instr(vpmovusqb))]
34198pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34199    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
34200}
34201
34202/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34203///
34204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34205#[inline]
34206#[target_feature(enable = "avx512f,avx512vl")]
34207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34208#[cfg_attr(test, assert_instr(vpmovusqb))]
34209pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34210    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
34211}
34212
34213/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34214///
34215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34216#[inline]
34217#[target_feature(enable = "avx512f,avx512vl")]
34218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34219#[cfg_attr(test, assert_instr(vpmovusqb))]
34220pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34221    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
34222}
34223
34224///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34225///
34226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34227#[inline]
34228#[target_feature(enable = "avx512f")]
34229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34230#[cfg_attr(test, assert_instr(vpmovqd))]
34231pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34232    vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k);
34233}
34234
34235///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34236///
34237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34238#[inline]
34239#[target_feature(enable = "avx512f,avx512vl")]
34240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34241#[cfg_attr(test, assert_instr(vpmovqd))]
34242pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34243    vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34244}
34245
34246///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34247///
34248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34249#[inline]
34250#[target_feature(enable = "avx512f,avx512vl")]
34251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34252#[cfg_attr(test, assert_instr(vpmovqd))]
34253pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34254    vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34255}
34256
34257/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34258///
34259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34260#[inline]
34261#[target_feature(enable = "avx512f")]
34262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34263#[cfg_attr(test, assert_instr(vpmovsqd))]
34264pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34265    vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k);
34266}
34267
34268/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34269///
34270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34271#[inline]
34272#[target_feature(enable = "avx512f,avx512vl")]
34273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34274#[cfg_attr(test, assert_instr(vpmovsqd))]
34275pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34276    vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34277}
34278
34279/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34280///
34281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34282#[inline]
34283#[target_feature(enable = "avx512f,avx512vl")]
34284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34285#[cfg_attr(test, assert_instr(vpmovsqd))]
34286pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34287    vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34288}
34289
34290/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34291///
34292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34293#[inline]
34294#[target_feature(enable = "avx512f")]
34295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34296#[cfg_attr(test, assert_instr(vpmovusqd))]
34297pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34298    vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k);
34299}
34300
34301/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34302///
34303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34304#[inline]
34305#[target_feature(enable = "avx512f,avx512vl")]
34306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34307#[cfg_attr(test, assert_instr(vpmovusqd))]
34308pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34309    vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34310}
34311
34312/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34313///
34314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34315#[inline]
34316#[target_feature(enable = "avx512f,avx512vl")]
34317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34318#[cfg_attr(test, assert_instr(vpmovusqd))]
34319pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34320    vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34321}
34322
34323/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34324///
34325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34326#[inline]
34327#[target_feature(enable = "avx512f")]
34328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34329#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34330pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34331    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34332}
34333
34334/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34335///
34336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34337#[inline]
34338#[target_feature(enable = "avx512f,avx512vl")]
34339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34340#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34341pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34342    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34343}
34344
34345/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34346///
34347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34348#[inline]
34349#[target_feature(enable = "avx512f,avx512vl")]
34350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34351#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34352pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34353    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34354}
34355
34356/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34357///
34358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34359#[inline]
34360#[target_feature(enable = "avx512f")]
34361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34362#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34363pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34364    ptr::read_unaligned(mem_addr as *const __m512i)
34365}
34366
34367/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34368///
34369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34370#[inline]
34371#[target_feature(enable = "avx512f,avx512vl")]
34372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34373#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34374pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34375    ptr::read_unaligned(mem_addr as *const __m256i)
34376}
34377
34378/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34379///
34380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34381#[inline]
34382#[target_feature(enable = "avx512f,avx512vl")]
34383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34384#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34385pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34386    ptr::read_unaligned(mem_addr as *const __m128i)
34387}
34388
34389/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34390///
34391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34392#[inline]
34393#[target_feature(enable = "avx512f")]
34394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34395#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34396pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34397    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34398}
34399
34400/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34401///
34402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34403#[inline]
34404#[target_feature(enable = "avx512f,avx512vl")]
34405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34406#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34407pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34408    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34409}
34410
34411/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34412///
34413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34414#[inline]
34415#[target_feature(enable = "avx512f,avx512vl")]
34416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34417#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34418pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34419    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34420}
34421
34422/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34423///
34424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34425#[inline]
34426#[target_feature(enable = "avx512f")]
34427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34428#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34429pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
34430    ptr::read_unaligned(mem_addr)
34431}
34432
34433/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34434///
34435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34436#[inline]
34437#[target_feature(enable = "avx512f")]
34438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34439#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34440pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34441    ptr::write_unaligned(mem_addr, a);
34442}
34443
34444/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34445/// floating-point elements) from memory into result.
34446/// `mem_addr` does not need to be aligned on any particular boundary.
34447///
34448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34449#[inline]
34450#[target_feature(enable = "avx512f")]
34451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34452#[cfg_attr(test, assert_instr(vmovups))]
34453pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34454    ptr::read_unaligned(mem_addr as *const __m512d)
34455}
34456
34457/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34458/// floating-point elements) from `a` into memory.
34459/// `mem_addr` does not need to be aligned on any particular boundary.
34460///
34461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34462#[inline]
34463#[target_feature(enable = "avx512f")]
34464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34465#[cfg_attr(test, assert_instr(vmovups))]
34466pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34467    ptr::write_unaligned(mem_addr as *mut __m512d, a);
34468}
34469
34470/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34471/// floating-point elements) from memory into result.
34472/// `mem_addr` does not need to be aligned on any particular boundary.
34473///
34474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34475#[inline]
34476#[target_feature(enable = "avx512f")]
34477#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34478#[cfg_attr(test, assert_instr(vmovups))]
34479pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34480    ptr::read_unaligned(mem_addr as *const __m512)
34481}
34482
34483/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34484/// floating-point elements) from `a` into memory.
34485/// `mem_addr` does not need to be aligned on any particular boundary.
34486///
34487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34488#[inline]
34489#[target_feature(enable = "avx512f")]
34490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34491#[cfg_attr(test, assert_instr(vmovups))]
34492pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34493    ptr::write_unaligned(mem_addr as *mut __m512, a);
34494}
34495
34496/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34497///
34498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34499#[inline]
34500#[target_feature(enable = "avx512f")]
34501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34502#[cfg_attr(
34503    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34504    assert_instr(vmovaps)
34505)] //should be vmovdqa32
34506pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
34507    ptr::read(mem_addr)
34508}
34509
34510/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34511///
34512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34513#[inline]
34514#[target_feature(enable = "avx512f")]
34515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34516#[cfg_attr(
34517    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34518    assert_instr(vmovaps)
34519)] //should be vmovdqa32
34520pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34521    ptr::write(mem_addr, a);
34522}
34523
34524/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34525///
34526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34527#[inline]
34528#[target_feature(enable = "avx512f")]
34529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34530#[cfg_attr(
34531    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34532    assert_instr(vmovaps)
34533)] //should be vmovdqa32
34534pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34535    ptr::read(mem_addr as *const __m512i)
34536}
34537
34538/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34539///
34540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34541#[inline]
34542#[target_feature(enable = "avx512f,avx512vl")]
34543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34544#[cfg_attr(
34545    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34546    assert_instr(vmovaps)
34547)] //should be vmovdqa32
34548pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34549    ptr::read(mem_addr as *const __m256i)
34550}
34551
34552/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34553///
34554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34555#[inline]
34556#[target_feature(enable = "avx512f,avx512vl")]
34557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34558#[cfg_attr(
34559    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34560    assert_instr(vmovaps)
34561)] //should be vmovdqa32
34562pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34563    ptr::read(mem_addr as *const __m128i)
34564}
34565
34566/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34567///
34568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34569#[inline]
34570#[target_feature(enable = "avx512f")]
34571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34572#[cfg_attr(
34573    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34574    assert_instr(vmovaps)
34575)] //should be vmovdqa32
34576pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34577    ptr::write(mem_addr as *mut __m512i, a);
34578}
34579
34580/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34581///
34582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34583#[inline]
34584#[target_feature(enable = "avx512f,avx512vl")]
34585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34586#[cfg_attr(
34587    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34588    assert_instr(vmovaps)
34589)] //should be vmovdqa32
34590pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34591    ptr::write(mem_addr as *mut __m256i, a);
34592}
34593
34594/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34595///
34596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34597#[inline]
34598#[target_feature(enable = "avx512f,avx512vl")]
34599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34600#[cfg_attr(
34601    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34602    assert_instr(vmovaps)
34603)] //should be vmovdqa32
34604pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34605    ptr::write(mem_addr as *mut __m128i, a);
34606}
34607
34608/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34609///
34610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34611#[inline]
34612#[target_feature(enable = "avx512f")]
34613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34614#[cfg_attr(
34615    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34616    assert_instr(vmovaps)
34617)] //should be vmovdqa64
34618pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34619    ptr::read(mem_addr as *const __m512i)
34620}
34621
34622/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34623///
34624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34625#[inline]
34626#[target_feature(enable = "avx512f,avx512vl")]
34627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34628#[cfg_attr(
34629    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34630    assert_instr(vmovaps)
34631)] //should be vmovdqa64
34632pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34633    ptr::read(mem_addr as *const __m256i)
34634}
34635
34636/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34637///
34638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34639#[inline]
34640#[target_feature(enable = "avx512f,avx512vl")]
34641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34642#[cfg_attr(
34643    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34644    assert_instr(vmovaps)
34645)] //should be vmovdqa64
34646pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34647    ptr::read(mem_addr as *const __m128i)
34648}
34649
34650/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34651///
34652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34653#[inline]
34654#[target_feature(enable = "avx512f")]
34655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34656#[cfg_attr(
34657    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34658    assert_instr(vmovaps)
34659)] //should be vmovdqa64
34660pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34661    ptr::write(mem_addr as *mut __m512i, a);
34662}
34663
34664/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34665///
34666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34667#[inline]
34668#[target_feature(enable = "avx512f,avx512vl")]
34669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34670#[cfg_attr(
34671    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34672    assert_instr(vmovaps)
34673)] //should be vmovdqa64
34674pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34675    ptr::write(mem_addr as *mut __m256i, a);
34676}
34677
34678/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34679///
34680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34681#[inline]
34682#[target_feature(enable = "avx512f,avx512vl")]
34683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34684#[cfg_attr(
34685    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34686    assert_instr(vmovaps)
34687)] //should be vmovdqa64
34688pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34689    ptr::write(mem_addr as *mut __m128i, a);
34690}
34691
34692/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34693///
34694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34695#[inline]
34696#[target_feature(enable = "avx512f")]
34697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34698#[cfg_attr(
34699    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34700    assert_instr(vmovaps)
34701)]
34702pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34703    ptr::read(mem_addr as *const __m512)
34704}
34705
34706/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34707///
34708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34709#[inline]
34710#[target_feature(enable = "avx512f")]
34711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34712#[cfg_attr(
34713    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34714    assert_instr(vmovaps)
34715)]
34716pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34717    ptr::write(mem_addr as *mut __m512, a);
34718}
34719
34720/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34721///
34722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34723#[inline]
34724#[target_feature(enable = "avx512f")]
34725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34726#[cfg_attr(
34727    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34728    assert_instr(vmovaps)
34729)] //should be vmovapd
34730pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34731    ptr::read(mem_addr as *const __m512d)
34732}
34733
34734/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34735///
34736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34737#[inline]
34738#[target_feature(enable = "avx512f")]
34739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34740#[cfg_attr(
34741    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34742    assert_instr(vmovaps)
34743)] //should be vmovapd
34744pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34745    ptr::write(mem_addr as *mut __m512d, a);
34746}
34747
34748/// Load packed 32-bit integers from memory into dst using writemask k
34749/// (elements are copied from src when the corresponding mask bit is not set).
34750/// mem_addr does not need to be aligned on any particular boundary.
34751///
34752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34753#[inline]
34754#[target_feature(enable = "avx512f")]
34755#[cfg_attr(test, assert_instr(vmovdqu32))]
34756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34757pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34758    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
34759    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
34760}
34761
34762/// Load packed 32-bit integers from memory into dst using zeromask k
34763/// (elements are zeroed out when the corresponding mask bit is not set).
34764/// mem_addr does not need to be aligned on any particular boundary.
34765///
34766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34767#[inline]
34768#[target_feature(enable = "avx512f")]
34769#[cfg_attr(test, assert_instr(vmovdqu32))]
34770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34771pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34772    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
34773}
34774
34775/// Load packed 64-bit integers from memory into dst using writemask k
34776/// (elements are copied from src when the corresponding mask bit is not set).
34777/// mem_addr does not need to be aligned on any particular boundary.
34778///
34779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34780#[inline]
34781#[target_feature(enable = "avx512f")]
34782#[cfg_attr(test, assert_instr(vmovdqu64))]
34783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34784pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34785    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
34786    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
34787}
34788
34789/// Load packed 64-bit integers from memory into dst using zeromask k
34790/// (elements are zeroed out when the corresponding mask bit is not set).
34791/// mem_addr does not need to be aligned on any particular boundary.
34792///
34793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34794#[inline]
34795#[target_feature(enable = "avx512f")]
34796#[cfg_attr(test, assert_instr(vmovdqu64))]
34797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34798pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34799    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
34800}
34801
34802/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34803/// (elements are copied from src when the corresponding mask bit is not set).
34804/// mem_addr does not need to be aligned on any particular boundary.
34805///
34806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34807#[inline]
34808#[target_feature(enable = "avx512f")]
34809#[cfg_attr(test, assert_instr(vmovups))]
34810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34811pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34812    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
34813    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
34814}
34815
34816/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34817/// (elements are zeroed out when the corresponding mask bit is not set).
34818/// mem_addr does not need to be aligned on any particular boundary.
34819///
34820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34821#[inline]
34822#[target_feature(enable = "avx512f")]
34823#[cfg_attr(test, assert_instr(vmovups))]
34824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34825pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34826    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
34827}
34828
34829/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34830/// (elements are copied from src when the corresponding mask bit is not set).
34831/// mem_addr does not need to be aligned on any particular boundary.
34832///
34833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34834#[inline]
34835#[target_feature(enable = "avx512f")]
34836#[cfg_attr(test, assert_instr(vmovupd))]
34837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34838pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34839    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
34840    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
34841}
34842
34843/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34844/// (elements are zeroed out when the corresponding mask bit is not set).
34845/// mem_addr does not need to be aligned on any particular boundary.
34846///
34847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34848#[inline]
34849#[target_feature(enable = "avx512f")]
34850#[cfg_attr(test, assert_instr(vmovupd))]
34851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34852pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34853    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
34854}
34855
34856/// Load packed 32-bit integers from memory into dst using writemask k
34857/// (elements are copied from src when the corresponding mask bit is not set).
34858/// mem_addr does not need to be aligned on any particular boundary.
34859///
34860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34861#[inline]
34862#[target_feature(enable = "avx512f,avx512vl")]
34863#[cfg_attr(test, assert_instr(vmovdqu32))]
34864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34865pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34866    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
34867    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
34868}
34869
34870/// Load packed 32-bit integers from memory into dst using zeromask k
34871/// (elements are zeroed out when the corresponding mask bit is not set).
34872/// mem_addr does not need to be aligned on any particular boundary.
34873///
34874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34875#[inline]
34876#[target_feature(enable = "avx512f,avx512vl")]
34877#[cfg_attr(test, assert_instr(vmovdqu32))]
34878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34879pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34880    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
34881}
34882
34883/// Load packed 64-bit integers from memory into dst using writemask k
34884/// (elements are copied from src when the corresponding mask bit is not set).
34885/// mem_addr does not need to be aligned on any particular boundary.
34886///
34887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34888#[inline]
34889#[target_feature(enable = "avx512f,avx512vl")]
34890#[cfg_attr(test, assert_instr(vmovdqu64))]
34891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34892pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34893    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
34894    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
34895}
34896
34897/// Load packed 64-bit integers from memory into dst using zeromask k
34898/// (elements are zeroed out when the corresponding mask bit is not set).
34899/// mem_addr does not need to be aligned on any particular boundary.
34900///
34901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34902#[inline]
34903#[target_feature(enable = "avx512f,avx512vl")]
34904#[cfg_attr(test, assert_instr(vmovdqu64))]
34905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34906pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34907    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
34908}
34909
34910/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34911/// (elements are copied from src when the corresponding mask bit is not set).
34912/// mem_addr does not need to be aligned on any particular boundary.
34913///
34914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34915#[inline]
34916#[target_feature(enable = "avx512f,avx512vl")]
34917#[cfg_attr(test, assert_instr(vmovups))]
34918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34919pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34920    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
34921    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
34922}
34923
34924/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34925/// (elements are zeroed out when the corresponding mask bit is not set).
34926/// mem_addr does not need to be aligned on any particular boundary.
34927///
34928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34929#[inline]
34930#[target_feature(enable = "avx512f,avx512vl")]
34931#[cfg_attr(test, assert_instr(vmovups))]
34932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34933pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34934    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
34935}
34936
34937/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34938/// (elements are copied from src when the corresponding mask bit is not set).
34939/// mem_addr does not need to be aligned on any particular boundary.
34940///
34941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34942#[inline]
34943#[target_feature(enable = "avx512f,avx512vl")]
34944#[cfg_attr(test, assert_instr(vmovupd))]
34945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34946pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34947    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
34948    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
34949}
34950
34951/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34952/// (elements are zeroed out when the corresponding mask bit is not set).
34953/// mem_addr does not need to be aligned on any particular boundary.
34954///
34955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34956#[inline]
34957#[target_feature(enable = "avx512f,avx512vl")]
34958#[cfg_attr(test, assert_instr(vmovupd))]
34959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34960pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34961    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
34962}
34963
34964/// Load packed 32-bit integers from memory into dst using writemask k
34965/// (elements are copied from src when the corresponding mask bit is not set).
34966/// mem_addr does not need to be aligned on any particular boundary.
34967///
34968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34969#[inline]
34970#[target_feature(enable = "avx512f,avx512vl")]
34971#[cfg_attr(test, assert_instr(vmovdqu32))]
34972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34973pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34974    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
34975    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
34976}
34977
34978/// Load packed 32-bit integers from memory into dst using zeromask k
34979/// (elements are zeroed out when the corresponding mask bit is not set).
34980/// mem_addr does not need to be aligned on any particular boundary.
34981///
34982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
34983#[inline]
34984#[target_feature(enable = "avx512f,avx512vl")]
34985#[cfg_attr(test, assert_instr(vmovdqu32))]
34986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34987pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34988    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
34989}
34990
34991/// Load packed 64-bit integers from memory into dst using writemask k
34992/// (elements are copied from src when the corresponding mask bit is not set).
34993/// mem_addr does not need to be aligned on any particular boundary.
34994///
34995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
34996#[inline]
34997#[target_feature(enable = "avx512f,avx512vl")]
34998#[cfg_attr(test, assert_instr(vmovdqu64))]
34999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35000pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35001    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
35002    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
35003}
35004
35005/// Load packed 64-bit integers from memory into dst using zeromask k
35006/// (elements are zeroed out when the corresponding mask bit is not set).
35007/// mem_addr does not need to be aligned on any particular boundary.
35008///
35009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
35010#[inline]
35011#[target_feature(enable = "avx512f,avx512vl")]
35012#[cfg_attr(test, assert_instr(vmovdqu64))]
35013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35014pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35015    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
35016}
35017
35018/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35019/// (elements are copied from src when the corresponding mask bit is not set).
35020/// mem_addr does not need to be aligned on any particular boundary.
35021///
35022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
35023#[inline]
35024#[target_feature(enable = "avx512f,avx512vl")]
35025#[cfg_attr(test, assert_instr(vmovups))]
35026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35027pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35028    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
35029    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
35030}
35031
35032/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35033/// (elements are zeroed out when the corresponding mask bit is not set).
35034/// mem_addr does not need to be aligned on any particular boundary.
35035///
35036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
35037#[inline]
35038#[target_feature(enable = "avx512f,avx512vl")]
35039#[cfg_attr(test, assert_instr(vmovups))]
35040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35041pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35042    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
35043}
35044
35045/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35046/// (elements are copied from src when the corresponding mask bit is not set).
35047/// mem_addr does not need to be aligned on any particular boundary.
35048///
35049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
35050#[inline]
35051#[target_feature(enable = "avx512f,avx512vl")]
35052#[cfg_attr(test, assert_instr(vmovupd))]
35053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35054pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35055    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
35056    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
35057}
35058
35059/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35060/// (elements are zeroed out when the corresponding mask bit is not set).
35061/// mem_addr does not need to be aligned on any particular boundary.
35062///
35063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35064#[inline]
35065#[target_feature(enable = "avx512f,avx512vl")]
35066#[cfg_attr(test, assert_instr(vmovupd))]
35067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35068pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35069    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
35070}
35071
35072/// Load packed 32-bit integers from memory into dst using writemask k
35073/// (elements are copied from src when the corresponding mask bit is not set).
35074/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35075///
35076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35077#[inline]
35078#[target_feature(enable = "avx512f")]
35079#[cfg_attr(test, assert_instr(vmovdqa32))]
35080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35081pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35082    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
35083    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
35084}
35085
35086/// Load packed 32-bit integers from memory into dst using zeromask k
35087/// (elements are zeroed out when the corresponding mask bit is not set).
35088/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35089///
35090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35091#[inline]
35092#[target_feature(enable = "avx512f")]
35093#[cfg_attr(test, assert_instr(vmovdqa32))]
35094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35095pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35096    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
35097}
35098
35099/// Load packed 64-bit integers from memory into dst using writemask k
35100/// (elements are copied from src when the corresponding mask bit is not set).
35101/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35102///
35103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35104#[inline]
35105#[target_feature(enable = "avx512f")]
35106#[cfg_attr(test, assert_instr(vmovdqa64))]
35107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35108pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35109    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
35110    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
35111}
35112
35113/// Load packed 64-bit integers from memory into dst using zeromask k
35114/// (elements are zeroed out when the corresponding mask bit is not set).
35115/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35116///
35117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35118#[inline]
35119#[target_feature(enable = "avx512f")]
35120#[cfg_attr(test, assert_instr(vmovdqa64))]
35121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35122pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35123    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
35124}
35125
35126/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35127/// (elements are copied from src when the corresponding mask bit is not set).
35128/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35129///
35130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35131#[inline]
35132#[target_feature(enable = "avx512f")]
35133#[cfg_attr(test, assert_instr(vmovaps))]
35134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35135pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35136    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
35137    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
35138}
35139
35140/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35141/// (elements are zeroed out when the corresponding mask bit is not set).
35142/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35143///
35144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35145#[inline]
35146#[target_feature(enable = "avx512f")]
35147#[cfg_attr(test, assert_instr(vmovaps))]
35148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35149pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35150    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
35151}
35152
35153/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35154/// (elements are copied from src when the corresponding mask bit is not set).
35155/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35156///
35157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35158#[inline]
35159#[target_feature(enable = "avx512f")]
35160#[cfg_attr(test, assert_instr(vmovapd))]
35161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35162pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35163    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
35164    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
35165}
35166
35167/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35168/// (elements are zeroed out when the corresponding mask bit is not set).
35169/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35170///
35171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35172#[inline]
35173#[target_feature(enable = "avx512f")]
35174#[cfg_attr(test, assert_instr(vmovapd))]
35175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35176pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35177    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
35178}
35179
35180/// Load packed 32-bit integers from memory into dst using writemask k
35181/// (elements are copied from src when the corresponding mask bit is not set).
35182/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35183///
35184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35185#[inline]
35186#[target_feature(enable = "avx512f,avx512vl")]
35187#[cfg_attr(test, assert_instr(vmovdqa32))]
35188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35189pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35190    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
35191    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
35192}
35193
35194/// Load packed 32-bit integers from memory into dst using zeromask k
35195/// (elements are zeroed out when the corresponding mask bit is not set).
35196/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35197///
35198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35199#[inline]
35200#[target_feature(enable = "avx512f,avx512vl")]
35201#[cfg_attr(test, assert_instr(vmovdqa32))]
35202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35203pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35204    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
35205}
35206
35207/// Load packed 64-bit integers from memory into dst using writemask k
35208/// (elements are copied from src when the corresponding mask bit is not set).
35209/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35210///
35211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35212#[inline]
35213#[target_feature(enable = "avx512f,avx512vl")]
35214#[cfg_attr(test, assert_instr(vmovdqa64))]
35215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35216pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35217    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
35218    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
35219}
35220
35221/// Load packed 64-bit integers from memory into dst using zeromask k
35222/// (elements are zeroed out when the corresponding mask bit is not set).
35223/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35224///
35225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35226#[inline]
35227#[target_feature(enable = "avx512f,avx512vl")]
35228#[cfg_attr(test, assert_instr(vmovdqa64))]
35229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35230pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35231    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
35232}
35233
35234/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35235/// (elements are copied from src when the corresponding mask bit is not set).
35236/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35237///
35238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35239#[inline]
35240#[target_feature(enable = "avx512f,avx512vl")]
35241#[cfg_attr(test, assert_instr(vmovaps))]
35242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35243pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35244    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
35245    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
35246}
35247
35248/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35249/// (elements are zeroed out when the corresponding mask bit is not set).
35250/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35251///
35252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35253#[inline]
35254#[target_feature(enable = "avx512f,avx512vl")]
35255#[cfg_attr(test, assert_instr(vmovaps))]
35256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35257pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35258    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
35259}
35260
35261/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35262/// (elements are copied from src when the corresponding mask bit is not set).
35263/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35264///
35265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35266#[inline]
35267#[target_feature(enable = "avx512f,avx512vl")]
35268#[cfg_attr(test, assert_instr(vmovapd))]
35269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35270pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35271    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
35272    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
35273}
35274
35275/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35276/// (elements are zeroed out when the corresponding mask bit is not set).
35277/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35278///
35279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35280#[inline]
35281#[target_feature(enable = "avx512f,avx512vl")]
35282#[cfg_attr(test, assert_instr(vmovapd))]
35283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35284pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35285    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
35286}
35287
35288/// Load packed 32-bit integers from memory into dst using writemask k
35289/// (elements are copied from src when the corresponding mask bit is not set).
35290/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35291///
35292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35293#[inline]
35294#[target_feature(enable = "avx512f,avx512vl")]
35295#[cfg_attr(test, assert_instr(vmovdqa32))]
35296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35297pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35298    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
35299    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
35300}
35301
35302/// Load packed 32-bit integers from memory into dst using zeromask k
35303/// (elements are zeroed out when the corresponding mask bit is not set).
35304/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35305///
35306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35307#[inline]
35308#[target_feature(enable = "avx512f,avx512vl")]
35309#[cfg_attr(test, assert_instr(vmovdqa32))]
35310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35311pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35312    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
35313}
35314
35315/// Load packed 64-bit integers from memory into dst using writemask k
35316/// (elements are copied from src when the corresponding mask bit is not set).
35317/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35318///
35319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35320#[inline]
35321#[target_feature(enable = "avx512f,avx512vl")]
35322#[cfg_attr(test, assert_instr(vmovdqa64))]
35323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35324pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35325    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
35326    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
35327}
35328
35329/// Load packed 64-bit integers from memory into dst using zeromask k
35330/// (elements are zeroed out when the corresponding mask bit is not set).
35331/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35332///
35333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35334#[inline]
35335#[target_feature(enable = "avx512f,avx512vl")]
35336#[cfg_attr(test, assert_instr(vmovdqa64))]
35337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35338pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35339    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
35340}
35341
35342/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35343/// (elements are copied from src when the corresponding mask bit is not set).
35344/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35345///
35346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35347#[inline]
35348#[target_feature(enable = "avx512f,avx512vl")]
35349#[cfg_attr(test, assert_instr(vmovaps))]
35350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35351pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35352    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
35353    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
35354}
35355
35356/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35357/// (elements are zeroed out when the corresponding mask bit is not set).
35358/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35359///
35360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35361#[inline]
35362#[target_feature(enable = "avx512f,avx512vl")]
35363#[cfg_attr(test, assert_instr(vmovaps))]
35364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35365pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35366    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
35367}
35368
35369/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35370/// (elements are copied from src when the corresponding mask bit is not set).
35371/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35372///
35373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35374#[inline]
35375#[target_feature(enable = "avx512f,avx512vl")]
35376#[cfg_attr(test, assert_instr(vmovapd))]
35377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35378pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35379    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
35380    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
35381}
35382
35383/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35384/// (elements are zeroed out when the corresponding mask bit is not set).
35385/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35386///
35387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35388#[inline]
35389#[target_feature(enable = "avx512f,avx512vl")]
35390#[cfg_attr(test, assert_instr(vmovapd))]
35391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35392pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35393    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
35394}
35395
35396/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35397/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35398/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35399/// exception may be generated.
35400///
35401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35402#[inline]
35403#[cfg_attr(test, assert_instr(vmovss))]
35404#[target_feature(enable = "avx512f")]
35405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35406pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35407    let mut dst: __m128 = src;
35408    asm!(
35409        vpl!("vmovss {dst}{{{k}}}"),
35410        p = in(reg) mem_addr,
35411        k = in(kreg) k,
35412        dst = inout(xmm_reg) dst,
35413        options(pure, readonly, nostack, preserves_flags),
35414    );
35415    dst
35416}
35417
35418/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35419/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35420/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35421/// exception may be generated.
35422///
35423/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35424#[inline]
35425#[cfg_attr(test, assert_instr(vmovss))]
35426#[target_feature(enable = "avx512f")]
35427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35428pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35429    let mut dst: __m128;
35430    asm!(
35431        vpl!("vmovss {dst}{{{k}}} {{z}}"),
35432        p = in(reg) mem_addr,
35433        k = in(kreg) k,
35434        dst = out(xmm_reg) dst,
35435        options(pure, readonly, nostack, preserves_flags),
35436    );
35437    dst
35438}
35439
35440/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35441/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35442/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35443/// exception may be generated.
35444///
35445/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35446#[inline]
35447#[cfg_attr(test, assert_instr(vmovsd))]
35448#[target_feature(enable = "avx512f")]
35449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35450pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35451    let mut dst: __m128d = src;
35452    asm!(
35453        vpl!("vmovsd {dst}{{{k}}}"),
35454        p = in(reg) mem_addr,
35455        k = in(kreg) k,
35456        dst = inout(xmm_reg) dst,
35457        options(pure, readonly, nostack, preserves_flags),
35458    );
35459    dst
35460}
35461
35462/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35463/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35464/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35465/// may be generated.
35466///
35467/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35468#[inline]
35469#[cfg_attr(test, assert_instr(vmovsd))]
35470#[target_feature(enable = "avx512f")]
35471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35472pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35473    let mut dst: __m128d;
35474    asm!(
35475        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35476        p = in(reg) mem_addr,
35477        k = in(kreg) k,
35478        dst = out(xmm_reg) dst,
35479        options(pure, readonly, nostack, preserves_flags),
35480    );
35481    dst
35482}
35483
35484/// Store packed 32-bit integers from a into memory using writemask k.
35485/// mem_addr does not need to be aligned on any particular boundary.
35486///
35487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35488#[inline]
35489#[target_feature(enable = "avx512f")]
35490#[cfg_attr(test, assert_instr(vmovdqu32))]
35491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35492pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35493    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
35494    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
35495}
35496
35497/// Store packed 64-bit integers from a into memory using writemask k.
35498/// mem_addr does not need to be aligned on any particular boundary.
35499///
35500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35501#[inline]
35502#[target_feature(enable = "avx512f")]
35503#[cfg_attr(test, assert_instr(vmovdqu64))]
35504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35505pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35506    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
35507    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
35508}
35509
35510/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35511/// mem_addr does not need to be aligned on any particular boundary.
35512///
35513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35514#[inline]
35515#[target_feature(enable = "avx512f")]
35516#[cfg_attr(test, assert_instr(vmovups))]
35517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35518pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35519    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
35520    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
35521}
35522
35523/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35524/// mem_addr does not need to be aligned on any particular boundary.
35525///
35526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35527#[inline]
35528#[target_feature(enable = "avx512f")]
35529#[cfg_attr(test, assert_instr(vmovupd))]
35530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35531pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35532    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
35533    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
35534}
35535
35536/// Store packed 32-bit integers from a into memory using writemask k.
35537/// mem_addr does not need to be aligned on any particular boundary.
35538///
35539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35540#[inline]
35541#[target_feature(enable = "avx512f,avx512vl")]
35542#[cfg_attr(test, assert_instr(vmovdqu32))]
35543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35544pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35545    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
35546    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
35547}
35548
35549/// Store packed 64-bit integers from a into memory using writemask k.
35550/// mem_addr does not need to be aligned on any particular boundary.
35551///
35552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35553#[inline]
35554#[target_feature(enable = "avx512f,avx512vl")]
35555#[cfg_attr(test, assert_instr(vmovdqu64))]
35556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35557pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35558    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
35559    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
35560}
35561
35562/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35563/// mem_addr does not need to be aligned on any particular boundary.
35564///
35565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35566#[inline]
35567#[target_feature(enable = "avx512f,avx512vl")]
35568#[cfg_attr(test, assert_instr(vmovups))]
35569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35570pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35571    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
35572    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
35573}
35574
35575/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35576/// mem_addr does not need to be aligned on any particular boundary.
35577///
35578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35579#[inline]
35580#[target_feature(enable = "avx512f,avx512vl")]
35581#[cfg_attr(test, assert_instr(vmovupd))]
35582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35583pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35584    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
35585    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
35586}
35587
35588/// Store packed 32-bit integers from a into memory using writemask k.
35589/// mem_addr does not need to be aligned on any particular boundary.
35590///
35591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35592#[inline]
35593#[target_feature(enable = "avx512f,avx512vl")]
35594#[cfg_attr(test, assert_instr(vmovdqu32))]
35595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35596pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35597    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
35598    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
35599}
35600
35601/// Store packed 64-bit integers from a into memory using writemask k.
35602/// mem_addr does not need to be aligned on any particular boundary.
35603///
35604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35605#[inline]
35606#[target_feature(enable = "avx512f,avx512vl")]
35607#[cfg_attr(test, assert_instr(vmovdqu64))]
35608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35609pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35610    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
35611    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
35612}
35613
35614/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35615/// mem_addr does not need to be aligned on any particular boundary.
35616///
35617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35618#[inline]
35619#[target_feature(enable = "avx512f,avx512vl")]
35620#[cfg_attr(test, assert_instr(vmovups))]
35621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35622pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35623    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
35624    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
35625}
35626
35627/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35628/// mem_addr does not need to be aligned on any particular boundary.
35629///
35630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35631#[inline]
35632#[target_feature(enable = "avx512f,avx512vl")]
35633#[cfg_attr(test, assert_instr(vmovupd))]
35634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35635pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35636    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
35637    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
35638}
35639
35640/// Store packed 32-bit integers from a into memory using writemask k.
35641/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35642///
35643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35644#[inline]
35645#[target_feature(enable = "avx512f")]
35646#[cfg_attr(test, assert_instr(vmovdqa32))]
35647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35648pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35649    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
35650    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
35651}
35652
35653/// Store packed 64-bit integers from a into memory using writemask k.
35654/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35655///
35656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35657#[inline]
35658#[target_feature(enable = "avx512f")]
35659#[cfg_attr(test, assert_instr(vmovdqa64))]
35660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35661pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35662    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
35663    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
35664}
35665
35666/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35667/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35668///
35669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35670#[inline]
35671#[target_feature(enable = "avx512f")]
35672#[cfg_attr(test, assert_instr(vmovaps))]
35673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35674pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35675    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
35676    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
35677}
35678
35679/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35680/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35681///
35682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35683#[inline]
35684#[target_feature(enable = "avx512f")]
35685#[cfg_attr(test, assert_instr(vmovapd))]
35686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35687pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35688    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
35689    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
35690}
35691
35692/// Store packed 32-bit integers from a into memory using writemask k.
35693/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35694///
35695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35696#[inline]
35697#[target_feature(enable = "avx512f,avx512vl")]
35698#[cfg_attr(test, assert_instr(vmovdqa32))]
35699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35700pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35701    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
35702    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
35703}
35704
35705/// Store packed 64-bit integers from a into memory using writemask k.
35706/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35707///
35708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35709#[inline]
35710#[target_feature(enable = "avx512f,avx512vl")]
35711#[cfg_attr(test, assert_instr(vmovdqa64))]
35712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35713pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35714    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
35715    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
35716}
35717
35718/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35719/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35720///
35721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35722#[inline]
35723#[target_feature(enable = "avx512f,avx512vl")]
35724#[cfg_attr(test, assert_instr(vmovaps))]
35725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35726pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35727    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
35728    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
35729}
35730
35731/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35732/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35733///
35734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35735#[inline]
35736#[target_feature(enable = "avx512f,avx512vl")]
35737#[cfg_attr(test, assert_instr(vmovapd))]
35738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35739pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35740    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
35741    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
35742}
35743
35744/// Store packed 32-bit integers from a into memory using writemask k.
35745/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35746///
35747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35748#[inline]
35749#[target_feature(enable = "avx512f,avx512vl")]
35750#[cfg_attr(test, assert_instr(vmovdqa32))]
35751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35752pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35753    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
35754    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
35755}
35756
35757/// Store packed 64-bit integers from a into memory using writemask k.
35758/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35759///
35760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35761#[inline]
35762#[target_feature(enable = "avx512f,avx512vl")]
35763#[cfg_attr(test, assert_instr(vmovdqa64))]
35764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35765pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35766    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
35767    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
35768}
35769
35770/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35771/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35772///
35773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35774#[inline]
35775#[target_feature(enable = "avx512f,avx512vl")]
35776#[cfg_attr(test, assert_instr(vmovaps))]
35777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35778pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35779    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
35780    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
35781}
35782
35783/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35784/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35785///
35786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35787#[inline]
35788#[target_feature(enable = "avx512f,avx512vl")]
35789#[cfg_attr(test, assert_instr(vmovapd))]
35790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35791pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35792    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
35793    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
35794}
35795
35796/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35797/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35798///
35799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35800#[inline]
35801#[cfg_attr(test, assert_instr(vmovss))]
35802#[target_feature(enable = "avx512f")]
35803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35804pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35805    asm!(
35806        vps!("vmovss", "{{{k}}}, {a}"),
35807        p = in(reg) mem_addr,
35808        k = in(kreg) k,
35809        a = in(xmm_reg) a,
35810        options(nostack, preserves_flags),
35811    );
35812}
35813
35814/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35815/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35816///
35817/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35818#[inline]
35819#[cfg_attr(test, assert_instr(vmovsd))]
35820#[target_feature(enable = "avx512f")]
35821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35822pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35823    asm!(
35824        vps!("vmovsd", "{{{k}}}, {a}"),
35825        p = in(reg) mem_addr,
35826        k = in(kreg) k,
35827        a = in(xmm_reg) a,
35828        options(nostack, preserves_flags),
35829    );
35830}
35831
35832/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35833///
35834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35835#[inline]
35836#[target_feature(enable = "avx512f")]
35837#[cfg_attr(test, assert_instr(vpexpandd))]
35838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35839pub unsafe fn _mm512_mask_expandloadu_epi32(
35840    src: __m512i,
35841    k: __mmask16,
35842    mem_addr: *const i32,
35843) -> __m512i {
35844    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
35845}
35846
35847/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35848///
35849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35850#[inline]
35851#[target_feature(enable = "avx512f")]
35852#[cfg_attr(test, assert_instr(vpexpandd))]
35853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35854pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35855    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
35856}
35857
35858/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35859///
35860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35861#[inline]
35862#[target_feature(enable = "avx512f,avx512vl")]
35863#[cfg_attr(test, assert_instr(vpexpandd))]
35864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35865pub unsafe fn _mm256_mask_expandloadu_epi32(
35866    src: __m256i,
35867    k: __mmask8,
35868    mem_addr: *const i32,
35869) -> __m256i {
35870    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
35871}
35872
35873/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35874///
35875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35876#[inline]
35877#[target_feature(enable = "avx512f,avx512vl")]
35878#[cfg_attr(test, assert_instr(vpexpandd))]
35879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35880pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35881    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
35882}
35883
35884/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35885///
35886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35887#[inline]
35888#[target_feature(enable = "avx512f,avx512vl")]
35889#[cfg_attr(test, assert_instr(vpexpandd))]
35890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35891pub unsafe fn _mm_mask_expandloadu_epi32(
35892    src: __m128i,
35893    k: __mmask8,
35894    mem_addr: *const i32,
35895) -> __m128i {
35896    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
35897}
35898
35899/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35900///
35901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35902#[inline]
35903#[target_feature(enable = "avx512f,avx512vl")]
35904#[cfg_attr(test, assert_instr(vpexpandd))]
35905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35906pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35907    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
35908}
35909
35910/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35911///
35912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35913#[inline]
35914#[target_feature(enable = "avx512f")]
35915#[cfg_attr(test, assert_instr(vpexpandq))]
35916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35917pub unsafe fn _mm512_mask_expandloadu_epi64(
35918    src: __m512i,
35919    k: __mmask8,
35920    mem_addr: *const i64,
35921) -> __m512i {
35922    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
35923}
35924
35925/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35926///
35927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35928#[inline]
35929#[target_feature(enable = "avx512f")]
35930#[cfg_attr(test, assert_instr(vpexpandq))]
35931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35932pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35933    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
35934}
35935
35936/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35937///
35938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35939#[inline]
35940#[target_feature(enable = "avx512f,avx512vl")]
35941#[cfg_attr(test, assert_instr(vpexpandq))]
35942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35943pub unsafe fn _mm256_mask_expandloadu_epi64(
35944    src: __m256i,
35945    k: __mmask8,
35946    mem_addr: *const i64,
35947) -> __m256i {
35948    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
35949}
35950
35951/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35952///
35953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35954#[inline]
35955#[target_feature(enable = "avx512f,avx512vl")]
35956#[cfg_attr(test, assert_instr(vpexpandq))]
35957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35958pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35959    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
35960}
35961
35962/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35963///
35964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35965#[inline]
35966#[target_feature(enable = "avx512f,avx512vl")]
35967#[cfg_attr(test, assert_instr(vpexpandq))]
35968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35969pub unsafe fn _mm_mask_expandloadu_epi64(
35970    src: __m128i,
35971    k: __mmask8,
35972    mem_addr: *const i64,
35973) -> __m128i {
35974    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
35975}
35976
35977/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35978///
35979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35980#[inline]
35981#[target_feature(enable = "avx512f,avx512vl")]
35982#[cfg_attr(test, assert_instr(vpexpandq))]
35983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35984pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35985    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
35986}
35987
35988/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35989///
35990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35991#[inline]
35992#[target_feature(enable = "avx512f")]
35993#[cfg_attr(test, assert_instr(vexpandps))]
35994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35995pub unsafe fn _mm512_mask_expandloadu_ps(
35996    src: __m512,
35997    k: __mmask16,
35998    mem_addr: *const f32,
35999) -> __m512 {
36000    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
36001}
36002
36003/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36004///
36005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
36006#[inline]
36007#[target_feature(enable = "avx512f")]
36008#[cfg_attr(test, assert_instr(vexpandps))]
36009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36010pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36011    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
36012}
36013
36014/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36015///
36016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
36017#[inline]
36018#[target_feature(enable = "avx512f,avx512vl")]
36019#[cfg_attr(test, assert_instr(vexpandps))]
36020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36021pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36022    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
36023}
36024
36025/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36026///
36027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
36028#[inline]
36029#[target_feature(enable = "avx512f,avx512vl")]
36030#[cfg_attr(test, assert_instr(vexpandps))]
36031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36032pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36033    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
36034}
36035
36036/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36037///
36038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
36039#[inline]
36040#[target_feature(enable = "avx512f,avx512vl")]
36041#[cfg_attr(test, assert_instr(vexpandps))]
36042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36043pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36044    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
36045}
36046
36047/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36048///
36049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
36050#[inline]
36051#[target_feature(enable = "avx512f,avx512vl")]
36052#[cfg_attr(test, assert_instr(vexpandps))]
36053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36054pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36055    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
36056}
36057
36058/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36059///
36060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
36061#[inline]
36062#[target_feature(enable = "avx512f")]
36063#[cfg_attr(test, assert_instr(vexpandpd))]
36064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36065pub unsafe fn _mm512_mask_expandloadu_pd(
36066    src: __m512d,
36067    k: __mmask8,
36068    mem_addr: *const f64,
36069) -> __m512d {
36070    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
36071}
36072
36073/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36074///
36075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
36076#[inline]
36077#[target_feature(enable = "avx512f")]
36078#[cfg_attr(test, assert_instr(vexpandpd))]
36079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36080pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36081    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
36082}
36083
36084/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36085///
36086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
36087#[inline]
36088#[target_feature(enable = "avx512f,avx512vl")]
36089#[cfg_attr(test, assert_instr(vexpandpd))]
36090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36091pub unsafe fn _mm256_mask_expandloadu_pd(
36092    src: __m256d,
36093    k: __mmask8,
36094    mem_addr: *const f64,
36095) -> __m256d {
36096    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
36097}
36098
36099/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36100///
36101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36102#[inline]
36103#[target_feature(enable = "avx512f,avx512vl")]
36104#[cfg_attr(test, assert_instr(vexpandpd))]
36105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36106pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36107    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
36108}
36109
36110/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36111///
36112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36113#[inline]
36114#[target_feature(enable = "avx512f,avx512vl")]
36115#[cfg_attr(test, assert_instr(vexpandpd))]
36116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36117pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36118    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
36119}
36120
36121/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36122///
36123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36124#[inline]
36125#[target_feature(enable = "avx512f,avx512vl")]
36126#[cfg_attr(test, assert_instr(vexpandpd))]
36127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36128pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36129    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
36130}
36131
36132/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36133///
36134/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36135#[inline]
36136#[target_feature(enable = "avx512f")]
36137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36138pub fn _mm512_setr_pd(
36139    e0: f64,
36140    e1: f64,
36141    e2: f64,
36142    e3: f64,
36143    e4: f64,
36144    e5: f64,
36145    e6: f64,
36146    e7: f64,
36147) -> __m512d {
36148    unsafe {
36149        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
36150        transmute(r)
36151    }
36152}
36153
36154/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36155///
36156/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36157#[inline]
36158#[target_feature(enable = "avx512f")]
36159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36160pub fn _mm512_set_pd(
36161    e0: f64,
36162    e1: f64,
36163    e2: f64,
36164    e3: f64,
36165    e4: f64,
36166    e5: f64,
36167    e6: f64,
36168    e7: f64,
36169) -> __m512d {
36170    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
36171}
36172
36173/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36174///
36175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36176#[inline]
36177#[target_feature(enable = "avx512f")]
36178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36179#[cfg_attr(test, assert_instr(vmovss))]
36180pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36181    unsafe {
36182        let extractsrc: f32 = simd_extract!(src, 0);
36183        let mut mov: f32 = extractsrc;
36184        if (k & 0b00000001) != 0 {
36185            mov = simd_extract!(b, 0);
36186        }
36187        simd_insert!(a, 0, mov)
36188    }
36189}
36190
36191/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36192///
36193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36194#[inline]
36195#[target_feature(enable = "avx512f")]
36196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36197#[cfg_attr(test, assert_instr(vmovss))]
36198pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36199    unsafe {
36200        let mut mov: f32 = 0.;
36201        if (k & 0b00000001) != 0 {
36202            mov = simd_extract!(b, 0);
36203        }
36204        simd_insert!(a, 0, mov)
36205    }
36206}
36207
36208/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36209///
36210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36211#[inline]
36212#[target_feature(enable = "avx512f")]
36213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36214#[cfg_attr(test, assert_instr(vmovsd))]
36215pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36216    unsafe {
36217        let extractsrc: f64 = simd_extract!(src, 0);
36218        let mut mov: f64 = extractsrc;
36219        if (k & 0b00000001) != 0 {
36220            mov = simd_extract!(b, 0);
36221        }
36222        simd_insert!(a, 0, mov)
36223    }
36224}
36225
36226/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36227///
36228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36229#[inline]
36230#[target_feature(enable = "avx512f")]
36231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36232#[cfg_attr(test, assert_instr(vmovsd))]
36233pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36234    unsafe {
36235        let mut mov: f64 = 0.;
36236        if (k & 0b00000001) != 0 {
36237            mov = simd_extract!(b, 0);
36238        }
36239        simd_insert!(a, 0, mov)
36240    }
36241}
36242
36243/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36244///
36245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36246#[inline]
36247#[target_feature(enable = "avx512f")]
36248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36249#[cfg_attr(test, assert_instr(vaddss))]
36250pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36251    unsafe {
36252        let extractsrc: f32 = simd_extract!(src, 0);
36253        let mut add: f32 = extractsrc;
36254        if (k & 0b00000001) != 0 {
36255            let extracta: f32 = simd_extract!(a, 0);
36256            let extractb: f32 = simd_extract!(b, 0);
36257            add = extracta + extractb;
36258        }
36259        simd_insert!(a, 0, add)
36260    }
36261}
36262
36263/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36264///
36265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36266#[inline]
36267#[target_feature(enable = "avx512f")]
36268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36269#[cfg_attr(test, assert_instr(vaddss))]
36270pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36271    unsafe {
36272        let mut add: f32 = 0.;
36273        if (k & 0b00000001) != 0 {
36274            let extracta: f32 = simd_extract!(a, 0);
36275            let extractb: f32 = simd_extract!(b, 0);
36276            add = extracta + extractb;
36277        }
36278        simd_insert!(a, 0, add)
36279    }
36280}
36281
36282/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36283///
36284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36285#[inline]
36286#[target_feature(enable = "avx512f")]
36287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36288#[cfg_attr(test, assert_instr(vaddsd))]
36289pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36290    unsafe {
36291        let extractsrc: f64 = simd_extract!(src, 0);
36292        let mut add: f64 = extractsrc;
36293        if (k & 0b00000001) != 0 {
36294            let extracta: f64 = simd_extract!(a, 0);
36295            let extractb: f64 = simd_extract!(b, 0);
36296            add = extracta + extractb;
36297        }
36298        simd_insert!(a, 0, add)
36299    }
36300}
36301
36302/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36308#[cfg_attr(test, assert_instr(vaddsd))]
36309pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36310    unsafe {
36311        let mut add: f64 = 0.;
36312        if (k & 0b00000001) != 0 {
36313            let extracta: f64 = simd_extract!(a, 0);
36314            let extractb: f64 = simd_extract!(b, 0);
36315            add = extracta + extractb;
36316        }
36317        simd_insert!(a, 0, add)
36318    }
36319}
36320
36321/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36322///
36323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36324#[inline]
36325#[target_feature(enable = "avx512f")]
36326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36327#[cfg_attr(test, assert_instr(vsubss))]
36328pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36329    unsafe {
36330        let extractsrc: f32 = simd_extract!(src, 0);
36331        let mut add: f32 = extractsrc;
36332        if (k & 0b00000001) != 0 {
36333            let extracta: f32 = simd_extract!(a, 0);
36334            let extractb: f32 = simd_extract!(b, 0);
36335            add = extracta - extractb;
36336        }
36337        simd_insert!(a, 0, add)
36338    }
36339}
36340
36341/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36342///
36343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36344#[inline]
36345#[target_feature(enable = "avx512f")]
36346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36347#[cfg_attr(test, assert_instr(vsubss))]
36348pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36349    unsafe {
36350        let mut add: f32 = 0.;
36351        if (k & 0b00000001) != 0 {
36352            let extracta: f32 = simd_extract!(a, 0);
36353            let extractb: f32 = simd_extract!(b, 0);
36354            add = extracta - extractb;
36355        }
36356        simd_insert!(a, 0, add)
36357    }
36358}
36359
36360/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36361///
36362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36363#[inline]
36364#[target_feature(enable = "avx512f")]
36365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36366#[cfg_attr(test, assert_instr(vsubsd))]
36367pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36368    unsafe {
36369        let extractsrc: f64 = simd_extract!(src, 0);
36370        let mut add: f64 = extractsrc;
36371        if (k & 0b00000001) != 0 {
36372            let extracta: f64 = simd_extract!(a, 0);
36373            let extractb: f64 = simd_extract!(b, 0);
36374            add = extracta - extractb;
36375        }
36376        simd_insert!(a, 0, add)
36377    }
36378}
36379
36380/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36381///
36382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36383#[inline]
36384#[target_feature(enable = "avx512f")]
36385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36386#[cfg_attr(test, assert_instr(vsubsd))]
36387pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36388    unsafe {
36389        let mut add: f64 = 0.;
36390        if (k & 0b00000001) != 0 {
36391            let extracta: f64 = simd_extract!(a, 0);
36392            let extractb: f64 = simd_extract!(b, 0);
36393            add = extracta - extractb;
36394        }
36395        simd_insert!(a, 0, add)
36396    }
36397}
36398
36399/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36400///
36401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36402#[inline]
36403#[target_feature(enable = "avx512f")]
36404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36405#[cfg_attr(test, assert_instr(vmulss))]
36406pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36407    unsafe {
36408        let extractsrc: f32 = simd_extract!(src, 0);
36409        let mut add: f32 = extractsrc;
36410        if (k & 0b00000001) != 0 {
36411            let extracta: f32 = simd_extract!(a, 0);
36412            let extractb: f32 = simd_extract!(b, 0);
36413            add = extracta * extractb;
36414        }
36415        simd_insert!(a, 0, add)
36416    }
36417}
36418
36419/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36420///
36421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36422#[inline]
36423#[target_feature(enable = "avx512f")]
36424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36425#[cfg_attr(test, assert_instr(vmulss))]
36426pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36427    unsafe {
36428        let mut add: f32 = 0.;
36429        if (k & 0b00000001) != 0 {
36430            let extracta: f32 = simd_extract!(a, 0);
36431            let extractb: f32 = simd_extract!(b, 0);
36432            add = extracta * extractb;
36433        }
36434        simd_insert!(a, 0, add)
36435    }
36436}
36437
36438/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36439///
36440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36441#[inline]
36442#[target_feature(enable = "avx512f")]
36443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36444#[cfg_attr(test, assert_instr(vmulsd))]
36445pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36446    unsafe {
36447        let extractsrc: f64 = simd_extract!(src, 0);
36448        let mut add: f64 = extractsrc;
36449        if (k & 0b00000001) != 0 {
36450            let extracta: f64 = simd_extract!(a, 0);
36451            let extractb: f64 = simd_extract!(b, 0);
36452            add = extracta * extractb;
36453        }
36454        simd_insert!(a, 0, add)
36455    }
36456}
36457
36458/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36459///
36460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36461#[inline]
36462#[target_feature(enable = "avx512f")]
36463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36464#[cfg_attr(test, assert_instr(vmulsd))]
36465pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36466    unsafe {
36467        let mut add: f64 = 0.;
36468        if (k & 0b00000001) != 0 {
36469            let extracta: f64 = simd_extract!(a, 0);
36470            let extractb: f64 = simd_extract!(b, 0);
36471            add = extracta * extractb;
36472        }
36473        simd_insert!(a, 0, add)
36474    }
36475}
36476
36477/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36478///
36479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36480#[inline]
36481#[target_feature(enable = "avx512f")]
36482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36483#[cfg_attr(test, assert_instr(vdivss))]
36484pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36485    unsafe {
36486        let extractsrc: f32 = simd_extract!(src, 0);
36487        let mut add: f32 = extractsrc;
36488        if (k & 0b00000001) != 0 {
36489            let extracta: f32 = simd_extract!(a, 0);
36490            let extractb: f32 = simd_extract!(b, 0);
36491            add = extracta / extractb;
36492        }
36493        simd_insert!(a, 0, add)
36494    }
36495}
36496
36497/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36498///
36499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36500#[inline]
36501#[target_feature(enable = "avx512f")]
36502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36503#[cfg_attr(test, assert_instr(vdivss))]
36504pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36505    unsafe {
36506        let mut add: f32 = 0.;
36507        if (k & 0b00000001) != 0 {
36508            let extracta: f32 = simd_extract!(a, 0);
36509            let extractb: f32 = simd_extract!(b, 0);
36510            add = extracta / extractb;
36511        }
36512        simd_insert!(a, 0, add)
36513    }
36514}
36515
36516/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36517///
36518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36519#[inline]
36520#[target_feature(enable = "avx512f")]
36521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36522#[cfg_attr(test, assert_instr(vdivsd))]
36523pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36524    unsafe {
36525        let extractsrc: f64 = simd_extract!(src, 0);
36526        let mut add: f64 = extractsrc;
36527        if (k & 0b00000001) != 0 {
36528            let extracta: f64 = simd_extract!(a, 0);
36529            let extractb: f64 = simd_extract!(b, 0);
36530            add = extracta / extractb;
36531        }
36532        simd_insert!(a, 0, add)
36533    }
36534}
36535
36536/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36537///
36538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36539#[inline]
36540#[target_feature(enable = "avx512f")]
36541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36542#[cfg_attr(test, assert_instr(vdivsd))]
36543pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36544    unsafe {
36545        let mut add: f64 = 0.;
36546        if (k & 0b00000001) != 0 {
36547            let extracta: f64 = simd_extract!(a, 0);
36548            let extractb: f64 = simd_extract!(b, 0);
36549            add = extracta / extractb;
36550        }
36551        simd_insert!(a, 0, add)
36552    }
36553}
36554
36555/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36556///
36557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36558#[inline]
36559#[target_feature(enable = "avx512f")]
36560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36561#[cfg_attr(test, assert_instr(vmaxss))]
36562pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36563    unsafe {
36564        transmute(vmaxss(
36565            a.as_f32x4(),
36566            b.as_f32x4(),
36567            src.as_f32x4(),
36568            k,
36569            _MM_FROUND_CUR_DIRECTION,
36570        ))
36571    }
36572}
36573
36574/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36575///
36576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36577#[inline]
36578#[target_feature(enable = "avx512f")]
36579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36580#[cfg_attr(test, assert_instr(vmaxss))]
36581pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36582    unsafe {
36583        transmute(vmaxss(
36584            a.as_f32x4(),
36585            b.as_f32x4(),
36586            f32x4::ZERO,
36587            k,
36588            _MM_FROUND_CUR_DIRECTION,
36589        ))
36590    }
36591}
36592
36593/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36594///
36595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36596#[inline]
36597#[target_feature(enable = "avx512f")]
36598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36599#[cfg_attr(test, assert_instr(vmaxsd))]
36600pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36601    unsafe {
36602        transmute(vmaxsd(
36603            a.as_f64x2(),
36604            b.as_f64x2(),
36605            src.as_f64x2(),
36606            k,
36607            _MM_FROUND_CUR_DIRECTION,
36608        ))
36609    }
36610}
36611
36612/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36613///
36614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36615#[inline]
36616#[target_feature(enable = "avx512f")]
36617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36618#[cfg_attr(test, assert_instr(vmaxsd))]
36619pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36620    unsafe {
36621        transmute(vmaxsd(
36622            a.as_f64x2(),
36623            b.as_f64x2(),
36624            f64x2::ZERO,
36625            k,
36626            _MM_FROUND_CUR_DIRECTION,
36627        ))
36628    }
36629}
36630
36631/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36632///
36633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36634#[inline]
36635#[target_feature(enable = "avx512f")]
36636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36637#[cfg_attr(test, assert_instr(vminss))]
36638pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36639    unsafe {
36640        transmute(vminss(
36641            a.as_f32x4(),
36642            b.as_f32x4(),
36643            src.as_f32x4(),
36644            k,
36645            _MM_FROUND_CUR_DIRECTION,
36646        ))
36647    }
36648}
36649
36650/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36651///
36652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36653#[inline]
36654#[target_feature(enable = "avx512f")]
36655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36656#[cfg_attr(test, assert_instr(vminss))]
36657pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36658    unsafe {
36659        transmute(vminss(
36660            a.as_f32x4(),
36661            b.as_f32x4(),
36662            f32x4::ZERO,
36663            k,
36664            _MM_FROUND_CUR_DIRECTION,
36665        ))
36666    }
36667}
36668
36669/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36672#[inline]
36673#[target_feature(enable = "avx512f")]
36674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36675#[cfg_attr(test, assert_instr(vminsd))]
36676pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36677    unsafe {
36678        transmute(vminsd(
36679            a.as_f64x2(),
36680            b.as_f64x2(),
36681            src.as_f64x2(),
36682            k,
36683            _MM_FROUND_CUR_DIRECTION,
36684        ))
36685    }
36686}
36687
36688/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36689///
36690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36691#[inline]
36692#[target_feature(enable = "avx512f")]
36693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36694#[cfg_attr(test, assert_instr(vminsd))]
36695pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36696    unsafe {
36697        transmute(vminsd(
36698            a.as_f64x2(),
36699            b.as_f64x2(),
36700            f64x2::ZERO,
36701            k,
36702            _MM_FROUND_CUR_DIRECTION,
36703        ))
36704    }
36705}
36706
36707/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36708///
36709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36710#[inline]
36711#[target_feature(enable = "avx512f")]
36712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36713#[cfg_attr(test, assert_instr(vsqrtss))]
36714pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36715    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36716}
36717
36718/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36719///
36720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36721#[inline]
36722#[target_feature(enable = "avx512f")]
36723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36724#[cfg_attr(test, assert_instr(vsqrtss))]
36725pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36726    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
36727}
36728
36729/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36730///
36731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36732#[inline]
36733#[target_feature(enable = "avx512f")]
36734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36735#[cfg_attr(test, assert_instr(vsqrtsd))]
36736pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36737    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36738}
36739
36740/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36741///
36742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36743#[inline]
36744#[target_feature(enable = "avx512f")]
36745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36746#[cfg_attr(test, assert_instr(vsqrtsd))]
36747pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36748    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
36749}
36750
36751/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36752///
36753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36754#[inline]
36755#[target_feature(enable = "avx512f")]
36756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36757#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36758pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36759    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36760}
36761
36762/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36763///
36764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36765#[inline]
36766#[target_feature(enable = "avx512f")]
36767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36768#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36769pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36770    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36771}
36772
36773/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36774///
36775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36776#[inline]
36777#[target_feature(enable = "avx512f")]
36778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36779#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36780pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36781    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36782}
36783
36784/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36785///
36786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36787#[inline]
36788#[target_feature(enable = "avx512f")]
36789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36790#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36791pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36792    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36793}
36794
36795/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36796///
36797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36798#[inline]
36799#[target_feature(enable = "avx512f")]
36800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36801#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36802pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36803    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36804}
36805
36806/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36807///
36808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36809#[inline]
36810#[target_feature(enable = "avx512f")]
36811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36812#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36813pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36814    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36815}
36816
36817/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36818///
36819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36820#[inline]
36821#[target_feature(enable = "avx512f")]
36822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36823#[cfg_attr(test, assert_instr(vrcp14ss))]
36824pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36825    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36826}
36827
36828/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36829///
36830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36831#[inline]
36832#[target_feature(enable = "avx512f")]
36833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36834#[cfg_attr(test, assert_instr(vrcp14ss))]
36835pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36836    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36837}
36838
36839/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36840///
36841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36842#[inline]
36843#[target_feature(enable = "avx512f")]
36844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36845#[cfg_attr(test, assert_instr(vrcp14ss))]
36846pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36847    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36848}
36849
36850/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36851///
36852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36853#[inline]
36854#[target_feature(enable = "avx512f")]
36855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36856#[cfg_attr(test, assert_instr(vrcp14sd))]
36857pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36858    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36859}
36860
36861/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36862///
36863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36864#[inline]
36865#[target_feature(enable = "avx512f")]
36866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36867#[cfg_attr(test, assert_instr(vrcp14sd))]
36868pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36869    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36870}
36871
36872/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36873///
36874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36875#[inline]
36876#[target_feature(enable = "avx512f")]
36877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36878#[cfg_attr(test, assert_instr(vrcp14sd))]
36879pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36880    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36881}
36882
36883/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36884///
36885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36886#[inline]
36887#[target_feature(enable = "avx512f")]
36888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36889#[cfg_attr(test, assert_instr(vgetexpss))]
36890pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36891    unsafe {
36892        transmute(vgetexpss(
36893            a.as_f32x4(),
36894            b.as_f32x4(),
36895            f32x4::ZERO,
36896            0b1,
36897            _MM_FROUND_NO_EXC,
36898        ))
36899    }
36900}
36901
36902/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36903///
36904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36905#[inline]
36906#[target_feature(enable = "avx512f")]
36907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36908#[cfg_attr(test, assert_instr(vgetexpss))]
36909pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36910    unsafe {
36911        transmute(vgetexpss(
36912            a.as_f32x4(),
36913            b.as_f32x4(),
36914            src.as_f32x4(),
36915            k,
36916            _MM_FROUND_NO_EXC,
36917        ))
36918    }
36919}
36920
36921/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36924#[inline]
36925#[target_feature(enable = "avx512f")]
36926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36927#[cfg_attr(test, assert_instr(vgetexpss))]
36928pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36929    unsafe {
36930        transmute(vgetexpss(
36931            a.as_f32x4(),
36932            b.as_f32x4(),
36933            f32x4::ZERO,
36934            k,
36935            _MM_FROUND_NO_EXC,
36936        ))
36937    }
36938}
36939
36940/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36941///
36942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36943#[inline]
36944#[target_feature(enable = "avx512f")]
36945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36946#[cfg_attr(test, assert_instr(vgetexpsd))]
36947pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36948    unsafe {
36949        transmute(vgetexpsd(
36950            a.as_f64x2(),
36951            b.as_f64x2(),
36952            f64x2::ZERO,
36953            0b1,
36954            _MM_FROUND_NO_EXC,
36955        ))
36956    }
36957}
36958
36959/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36960///
36961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36962#[inline]
36963#[target_feature(enable = "avx512f")]
36964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36965#[cfg_attr(test, assert_instr(vgetexpsd))]
36966pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36967    unsafe {
36968        transmute(vgetexpsd(
36969            a.as_f64x2(),
36970            b.as_f64x2(),
36971            src.as_f64x2(),
36972            k,
36973            _MM_FROUND_NO_EXC,
36974        ))
36975    }
36976}
36977
36978/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36979///
36980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36981#[inline]
36982#[target_feature(enable = "avx512f")]
36983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36984#[cfg_attr(test, assert_instr(vgetexpsd))]
36985pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36986    unsafe {
36987        transmute(vgetexpsd(
36988            a.as_f64x2(),
36989            b.as_f64x2(),
36990            f64x2::ZERO,
36991            k,
36992            _MM_FROUND_NO_EXC,
36993        ))
36994    }
36995}
36996
36997/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36998/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36999///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37000///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37001///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37002///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37003/// The sign is determined by sc which can take the following values:\
37004///    _MM_MANT_SIGN_src     // sign = sign(src)\
37005///    _MM_MANT_SIGN_zero    // sign = 0\
37006///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37008///
37009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
37010#[inline]
37011#[target_feature(enable = "avx512f")]
37012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37013#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37014#[rustc_legacy_const_generics(2, 3)]
37015pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37016    a: __m128,
37017    b: __m128,
37018) -> __m128 {
37019    unsafe {
37020        static_assert_uimm_bits!(NORM, 4);
37021        static_assert_uimm_bits!(SIGN, 2);
37022        let a = a.as_f32x4();
37023        let b = b.as_f32x4();
37024        let r = vgetmantss(
37025            a,
37026            b,
37027            SIGN << 2 | NORM,
37028            f32x4::ZERO,
37029            0b1,
37030            _MM_FROUND_CUR_DIRECTION,
37031        );
37032        transmute(r)
37033    }
37034}
37035
37036/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37037/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37038///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37039///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37040///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37041///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37042/// The sign is determined by sc which can take the following values:\
37043///    _MM_MANT_SIGN_src     // sign = sign(src)\
37044///    _MM_MANT_SIGN_zero    // sign = 0\
37045///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37046/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37047///
37048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
37049#[inline]
37050#[target_feature(enable = "avx512f")]
37051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37052#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37053#[rustc_legacy_const_generics(4, 5)]
37054pub fn _mm_mask_getmant_ss<
37055    const NORM: _MM_MANTISSA_NORM_ENUM,
37056    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37057>(
37058    src: __m128,
37059    k: __mmask8,
37060    a: __m128,
37061    b: __m128,
37062) -> __m128 {
37063    unsafe {
37064        static_assert_uimm_bits!(NORM, 4);
37065        static_assert_uimm_bits!(SIGN, 2);
37066        let a = a.as_f32x4();
37067        let b = b.as_f32x4();
37068        let src = src.as_f32x4();
37069        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37070        transmute(r)
37071    }
37072}
37073
37074/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37075/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37076///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37077///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37078///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37079///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37080/// The sign is determined by sc which can take the following values:\
37081///    _MM_MANT_SIGN_src     // sign = sign(src)\
37082///    _MM_MANT_SIGN_zero    // sign = 0\
37083///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37084/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37085///
37086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
37087#[inline]
37088#[target_feature(enable = "avx512f")]
37089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37090#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37091#[rustc_legacy_const_generics(3, 4)]
37092pub fn _mm_maskz_getmant_ss<
37093    const NORM: _MM_MANTISSA_NORM_ENUM,
37094    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37095>(
37096    k: __mmask8,
37097    a: __m128,
37098    b: __m128,
37099) -> __m128 {
37100    unsafe {
37101        static_assert_uimm_bits!(NORM, 4);
37102        static_assert_uimm_bits!(SIGN, 2);
37103        let a = a.as_f32x4();
37104        let b = b.as_f32x4();
37105        let r = vgetmantss(
37106            a,
37107            b,
37108            SIGN << 2 | NORM,
37109            f32x4::ZERO,
37110            k,
37111            _MM_FROUND_CUR_DIRECTION,
37112        );
37113        transmute(r)
37114    }
37115}
37116
37117/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37118/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37119///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37120///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37121///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37122///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37123/// The sign is determined by sc which can take the following values:\
37124///    _MM_MANT_SIGN_src     // sign = sign(src)\
37125///    _MM_MANT_SIGN_zero    // sign = 0\
37126///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37127/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37128///
37129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37130#[inline]
37131#[target_feature(enable = "avx512f")]
37132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37133#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37134#[rustc_legacy_const_generics(2, 3)]
37135pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37136    a: __m128d,
37137    b: __m128d,
37138) -> __m128d {
37139    unsafe {
37140        static_assert_uimm_bits!(NORM, 4);
37141        static_assert_uimm_bits!(SIGN, 2);
37142        let a = a.as_f64x2();
37143        let b = b.as_f64x2();
37144        let r = vgetmantsd(
37145            a,
37146            b,
37147            SIGN << 2 | NORM,
37148            f64x2::ZERO,
37149            0b1,
37150            _MM_FROUND_CUR_DIRECTION,
37151        );
37152        transmute(r)
37153    }
37154}
37155
37156/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37157/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37158///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37159///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37160///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37161///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37162/// The sign is determined by sc which can take the following values:\
37163///    _MM_MANT_SIGN_src     // sign = sign(src)\
37164///    _MM_MANT_SIGN_zero    // sign = 0\
37165///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37166/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37167///
37168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37169#[inline]
37170#[target_feature(enable = "avx512f")]
37171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37172#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37173#[rustc_legacy_const_generics(4, 5)]
37174pub fn _mm_mask_getmant_sd<
37175    const NORM: _MM_MANTISSA_NORM_ENUM,
37176    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37177>(
37178    src: __m128d,
37179    k: __mmask8,
37180    a: __m128d,
37181    b: __m128d,
37182) -> __m128d {
37183    unsafe {
37184        static_assert_uimm_bits!(NORM, 4);
37185        static_assert_uimm_bits!(SIGN, 2);
37186        let a = a.as_f64x2();
37187        let b = b.as_f64x2();
37188        let src = src.as_f64x2();
37189        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37190        transmute(r)
37191    }
37192}
37193
37194/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37195/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37196///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37197///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37198///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37199///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37200/// The sign is determined by sc which can take the following values:\
37201///    _MM_MANT_SIGN_src     // sign = sign(src)\
37202///    _MM_MANT_SIGN_zero    // sign = 0\
37203///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37204/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37205///
37206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37207#[inline]
37208#[target_feature(enable = "avx512f")]
37209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37210#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37211#[rustc_legacy_const_generics(3, 4)]
37212pub fn _mm_maskz_getmant_sd<
37213    const NORM: _MM_MANTISSA_NORM_ENUM,
37214    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37215>(
37216    k: __mmask8,
37217    a: __m128d,
37218    b: __m128d,
37219) -> __m128d {
37220    unsafe {
37221        static_assert_uimm_bits!(NORM, 4);
37222        static_assert_uimm_bits!(SIGN, 2);
37223        let a = a.as_f64x2();
37224        let b = b.as_f64x2();
37225        let r = vgetmantsd(
37226            a,
37227            b,
37228            SIGN << 2 | NORM,
37229            f64x2::ZERO,
37230            k,
37231            _MM_FROUND_CUR_DIRECTION,
37232        );
37233        transmute(r)
37234    }
37235}
37236
37237/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37238/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37239/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37240/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37241/// * [`_MM_FROUND_TO_POS_INF`] : round up
37242/// * [`_MM_FROUND_TO_ZERO`] : truncate
37243/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37244///
37245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37246#[inline]
37247#[target_feature(enable = "avx512f")]
37248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37249#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37250#[rustc_legacy_const_generics(2)]
37251pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37252    unsafe {
37253        static_assert_uimm_bits!(IMM8, 8);
37254        let a = a.as_f32x4();
37255        let b = b.as_f32x4();
37256        let r = vrndscaless(
37257            a,
37258            b,
37259            f32x4::ZERO,
37260            0b11111111,
37261            IMM8,
37262            _MM_FROUND_CUR_DIRECTION,
37263        );
37264        transmute(r)
37265    }
37266}
37267
37268/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37269/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37270/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37271/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37272/// * [`_MM_FROUND_TO_POS_INF`] : round up
37273/// * [`_MM_FROUND_TO_ZERO`] : truncate
37274/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37275///
37276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37277#[inline]
37278#[target_feature(enable = "avx512f")]
37279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37280#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37281#[rustc_legacy_const_generics(4)]
37282pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37283    src: __m128,
37284    k: __mmask8,
37285    a: __m128,
37286    b: __m128,
37287) -> __m128 {
37288    unsafe {
37289        static_assert_uimm_bits!(IMM8, 8);
37290        let a = a.as_f32x4();
37291        let b = b.as_f32x4();
37292        let src = src.as_f32x4();
37293        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37294        transmute(r)
37295    }
37296}
37297
37298/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37299/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37300/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37301/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37302/// * [`_MM_FROUND_TO_POS_INF`] : round up
37303/// * [`_MM_FROUND_TO_ZERO`] : truncate
37304/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37305///
37306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37307#[inline]
37308#[target_feature(enable = "avx512f")]
37309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37310#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37311#[rustc_legacy_const_generics(3)]
37312pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37313    unsafe {
37314        static_assert_uimm_bits!(IMM8, 8);
37315        let a = a.as_f32x4();
37316        let b = b.as_f32x4();
37317        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37318        transmute(r)
37319    }
37320}
37321
37322/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37323/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37324/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37325/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37326/// * [`_MM_FROUND_TO_POS_INF`] : round up
37327/// * [`_MM_FROUND_TO_ZERO`] : truncate
37328/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37329///
37330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37331#[inline]
37332#[target_feature(enable = "avx512f")]
37333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37334#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37335#[rustc_legacy_const_generics(2)]
37336pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37337    unsafe {
37338        static_assert_uimm_bits!(IMM8, 8);
37339        let a = a.as_f64x2();
37340        let b = b.as_f64x2();
37341        let r = vrndscalesd(
37342            a,
37343            b,
37344            f64x2::ZERO,
37345            0b11111111,
37346            IMM8,
37347            _MM_FROUND_CUR_DIRECTION,
37348        );
37349        transmute(r)
37350    }
37351}
37352
37353/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37354/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37355/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37356/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37357/// * [`_MM_FROUND_TO_POS_INF`] : round up
37358/// * [`_MM_FROUND_TO_ZERO`] : truncate
37359/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37360///
37361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37362#[inline]
37363#[target_feature(enable = "avx512f")]
37364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37365#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37366#[rustc_legacy_const_generics(4)]
37367pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37368    src: __m128d,
37369    k: __mmask8,
37370    a: __m128d,
37371    b: __m128d,
37372) -> __m128d {
37373    unsafe {
37374        static_assert_uimm_bits!(IMM8, 8);
37375        let a = a.as_f64x2();
37376        let b = b.as_f64x2();
37377        let src = src.as_f64x2();
37378        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37379        transmute(r)
37380    }
37381}
37382
37383/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37384/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37385/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37386/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37387/// * [`_MM_FROUND_TO_POS_INF`] : round up
37388/// * [`_MM_FROUND_TO_ZERO`] : truncate
37389/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37390///
37391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37392#[inline]
37393#[target_feature(enable = "avx512f")]
37394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37395#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37396#[rustc_legacy_const_generics(3)]
37397pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37398    unsafe {
37399        static_assert_uimm_bits!(IMM8, 8);
37400        let a = a.as_f64x2();
37401        let b = b.as_f64x2();
37402        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37403        transmute(r)
37404    }
37405}
37406
37407/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37408///
37409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37410#[inline]
37411#[target_feature(enable = "avx512f")]
37412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37413#[cfg_attr(test, assert_instr(vscalefss))]
37414pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37415    unsafe {
37416        let a = a.as_f32x4();
37417        let b = b.as_f32x4();
37418        transmute(vscalefss(
37419            a,
37420            b,
37421            f32x4::ZERO,
37422            0b11111111,
37423            _MM_FROUND_CUR_DIRECTION,
37424        ))
37425    }
37426}
37427
37428/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37429///
37430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37431#[inline]
37432#[target_feature(enable = "avx512f")]
37433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37434#[cfg_attr(test, assert_instr(vscalefss))]
37435pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37436    unsafe {
37437        let a = a.as_f32x4();
37438        let b = b.as_f32x4();
37439        let src = src.as_f32x4();
37440        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
37441    }
37442}
37443
37444/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37445///
37446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37447#[inline]
37448#[target_feature(enable = "avx512f")]
37449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37450#[cfg_attr(test, assert_instr(vscalefss))]
37451pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37452    unsafe {
37453        transmute(vscalefss(
37454            a.as_f32x4(),
37455            b.as_f32x4(),
37456            f32x4::ZERO,
37457            k,
37458            _MM_FROUND_CUR_DIRECTION,
37459        ))
37460    }
37461}
37462
37463/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37464///
37465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37466#[inline]
37467#[target_feature(enable = "avx512f")]
37468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37469#[cfg_attr(test, assert_instr(vscalefsd))]
37470pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37471    unsafe {
37472        transmute(vscalefsd(
37473            a.as_f64x2(),
37474            b.as_f64x2(),
37475            f64x2::ZERO,
37476            0b11111111,
37477            _MM_FROUND_CUR_DIRECTION,
37478        ))
37479    }
37480}
37481
37482/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37483///
37484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37485#[inline]
37486#[target_feature(enable = "avx512f")]
37487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37488#[cfg_attr(test, assert_instr(vscalefsd))]
37489pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37490    unsafe {
37491        transmute(vscalefsd(
37492            a.as_f64x2(),
37493            b.as_f64x2(),
37494            src.as_f64x2(),
37495            k,
37496            _MM_FROUND_CUR_DIRECTION,
37497        ))
37498    }
37499}
37500
37501/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37502///
37503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37504#[inline]
37505#[target_feature(enable = "avx512f")]
37506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37507#[cfg_attr(test, assert_instr(vscalefsd))]
37508pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37509    unsafe {
37510        transmute(vscalefsd(
37511            a.as_f64x2(),
37512            b.as_f64x2(),
37513            f64x2::ZERO,
37514            k,
37515            _MM_FROUND_CUR_DIRECTION,
37516        ))
37517    }
37518}
37519
37520/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37521///
37522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37523#[inline]
37524#[target_feature(enable = "avx512f")]
37525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37526#[cfg_attr(test, assert_instr(vfmadd))]
37527pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37528    unsafe {
37529        let mut fmadd: f32 = simd_extract!(a, 0);
37530        if (k & 0b00000001) != 0 {
37531            let extractb: f32 = simd_extract!(b, 0);
37532            let extractc: f32 = simd_extract!(c, 0);
37533            fmadd = fmaf32(fmadd, extractb, extractc);
37534        }
37535        simd_insert!(a, 0, fmadd)
37536    }
37537}
37538
37539/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37540///
37541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37542#[inline]
37543#[target_feature(enable = "avx512f")]
37544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37545#[cfg_attr(test, assert_instr(vfmadd))]
37546pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37547    unsafe {
37548        let mut fmadd: f32 = 0.;
37549        if (k & 0b00000001) != 0 {
37550            let extracta: f32 = simd_extract!(a, 0);
37551            let extractb: f32 = simd_extract!(b, 0);
37552            let extractc: f32 = simd_extract!(c, 0);
37553            fmadd = fmaf32(extracta, extractb, extractc);
37554        }
37555        simd_insert!(a, 0, fmadd)
37556    }
37557}
37558
37559/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37560///
37561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37562#[inline]
37563#[target_feature(enable = "avx512f")]
37564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37565#[cfg_attr(test, assert_instr(vfmadd))]
37566pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37567    unsafe {
37568        let mut fmadd: f32 = simd_extract!(c, 0);
37569        if (k & 0b00000001) != 0 {
37570            let extracta: f32 = simd_extract!(a, 0);
37571            let extractb: f32 = simd_extract!(b, 0);
37572            fmadd = fmaf32(extracta, extractb, fmadd);
37573        }
37574        simd_insert!(c, 0, fmadd)
37575    }
37576}
37577
37578/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37579///
37580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37581#[inline]
37582#[target_feature(enable = "avx512f")]
37583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37584#[cfg_attr(test, assert_instr(vfmadd))]
37585pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37586    unsafe {
37587        let mut fmadd: f64 = simd_extract!(a, 0);
37588        if (k & 0b00000001) != 0 {
37589            let extractb: f64 = simd_extract!(b, 0);
37590            let extractc: f64 = simd_extract!(c, 0);
37591            fmadd = fmaf64(fmadd, extractb, extractc);
37592        }
37593        simd_insert!(a, 0, fmadd)
37594    }
37595}
37596
37597/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37598///
37599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37600#[inline]
37601#[target_feature(enable = "avx512f")]
37602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37603#[cfg_attr(test, assert_instr(vfmadd))]
37604pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37605    unsafe {
37606        let mut fmadd: f64 = 0.;
37607        if (k & 0b00000001) != 0 {
37608            let extracta: f64 = simd_extract!(a, 0);
37609            let extractb: f64 = simd_extract!(b, 0);
37610            let extractc: f64 = simd_extract!(c, 0);
37611            fmadd = fmaf64(extracta, extractb, extractc);
37612        }
37613        simd_insert!(a, 0, fmadd)
37614    }
37615}
37616
37617/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37618///
37619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37620#[inline]
37621#[target_feature(enable = "avx512f")]
37622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37623#[cfg_attr(test, assert_instr(vfmadd))]
37624pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37625    unsafe {
37626        let mut fmadd: f64 = simd_extract!(c, 0);
37627        if (k & 0b00000001) != 0 {
37628            let extracta: f64 = simd_extract!(a, 0);
37629            let extractb: f64 = simd_extract!(b, 0);
37630            fmadd = fmaf64(extracta, extractb, fmadd);
37631        }
37632        simd_insert!(c, 0, fmadd)
37633    }
37634}
37635
37636/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37637///
37638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37639#[inline]
37640#[target_feature(enable = "avx512f")]
37641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37642#[cfg_attr(test, assert_instr(vfmsub))]
37643pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37644    unsafe {
37645        let mut fmsub: f32 = simd_extract!(a, 0);
37646        if (k & 0b00000001) != 0 {
37647            let extractb: f32 = simd_extract!(b, 0);
37648            let extractc: f32 = simd_extract!(c, 0);
37649            let extractc = -extractc;
37650            fmsub = fmaf32(fmsub, extractb, extractc);
37651        }
37652        simd_insert!(a, 0, fmsub)
37653    }
37654}
37655
37656/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37657///
37658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37659#[inline]
37660#[target_feature(enable = "avx512f")]
37661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37662#[cfg_attr(test, assert_instr(vfmsub))]
37663pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37664    unsafe {
37665        let mut fmsub: f32 = 0.;
37666        if (k & 0b00000001) != 0 {
37667            let extracta: f32 = simd_extract!(a, 0);
37668            let extractb: f32 = simd_extract!(b, 0);
37669            let extractc: f32 = simd_extract!(c, 0);
37670            let extractc = -extractc;
37671            fmsub = fmaf32(extracta, extractb, extractc);
37672        }
37673        simd_insert!(a, 0, fmsub)
37674    }
37675}
37676
37677/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37678///
37679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37680#[inline]
37681#[target_feature(enable = "avx512f")]
37682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37683#[cfg_attr(test, assert_instr(vfmsub))]
37684pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37685    unsafe {
37686        let mut fmsub: f32 = simd_extract!(c, 0);
37687        if (k & 0b00000001) != 0 {
37688            let extracta: f32 = simd_extract!(a, 0);
37689            let extractb: f32 = simd_extract!(b, 0);
37690            let extractc = -fmsub;
37691            fmsub = fmaf32(extracta, extractb, extractc);
37692        }
37693        simd_insert!(c, 0, fmsub)
37694    }
37695}
37696
37697/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37698///
37699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37700#[inline]
37701#[target_feature(enable = "avx512f")]
37702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37703#[cfg_attr(test, assert_instr(vfmsub))]
37704pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37705    unsafe {
37706        let mut fmsub: f64 = simd_extract!(a, 0);
37707        if (k & 0b00000001) != 0 {
37708            let extractb: f64 = simd_extract!(b, 0);
37709            let extractc: f64 = simd_extract!(c, 0);
37710            let extractc = -extractc;
37711            fmsub = fmaf64(fmsub, extractb, extractc);
37712        }
37713        simd_insert!(a, 0, fmsub)
37714    }
37715}
37716
37717/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37718///
37719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37720#[inline]
37721#[target_feature(enable = "avx512f")]
37722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37723#[cfg_attr(test, assert_instr(vfmsub))]
37724pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37725    unsafe {
37726        let mut fmsub: f64 = 0.;
37727        if (k & 0b00000001) != 0 {
37728            let extracta: f64 = simd_extract!(a, 0);
37729            let extractb: f64 = simd_extract!(b, 0);
37730            let extractc: f64 = simd_extract!(c, 0);
37731            let extractc = -extractc;
37732            fmsub = fmaf64(extracta, extractb, extractc);
37733        }
37734        simd_insert!(a, 0, fmsub)
37735    }
37736}
37737
37738/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37739///
37740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37741#[inline]
37742#[target_feature(enable = "avx512f")]
37743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37744#[cfg_attr(test, assert_instr(vfmsub))]
37745pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37746    unsafe {
37747        let mut fmsub: f64 = simd_extract!(c, 0);
37748        if (k & 0b00000001) != 0 {
37749            let extracta: f64 = simd_extract!(a, 0);
37750            let extractb: f64 = simd_extract!(b, 0);
37751            let extractc = -fmsub;
37752            fmsub = fmaf64(extracta, extractb, extractc);
37753        }
37754        simd_insert!(c, 0, fmsub)
37755    }
37756}
37757
37758/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37759///
37760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37761#[inline]
37762#[target_feature(enable = "avx512f")]
37763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37764#[cfg_attr(test, assert_instr(vfnmadd))]
37765pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37766    unsafe {
37767        let mut fnmadd: f32 = simd_extract!(a, 0);
37768        if (k & 0b00000001) != 0 {
37769            let extracta = -fnmadd;
37770            let extractb: f32 = simd_extract!(b, 0);
37771            let extractc: f32 = simd_extract!(c, 0);
37772            fnmadd = fmaf32(extracta, extractb, extractc);
37773        }
37774        simd_insert!(a, 0, fnmadd)
37775    }
37776}
37777
37778/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37779///
37780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37781#[inline]
37782#[target_feature(enable = "avx512f")]
37783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37784#[cfg_attr(test, assert_instr(vfnmadd))]
37785pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37786    unsafe {
37787        let mut fnmadd: f32 = 0.;
37788        if (k & 0b00000001) != 0 {
37789            let extracta: f32 = simd_extract!(a, 0);
37790            let extracta = -extracta;
37791            let extractb: f32 = simd_extract!(b, 0);
37792            let extractc: f32 = simd_extract!(c, 0);
37793            fnmadd = fmaf32(extracta, extractb, extractc);
37794        }
37795        simd_insert!(a, 0, fnmadd)
37796    }
37797}
37798
37799/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37800///
37801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37802#[inline]
37803#[target_feature(enable = "avx512f")]
37804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37805#[cfg_attr(test, assert_instr(vfnmadd))]
37806pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37807    unsafe {
37808        let mut fnmadd: f32 = simd_extract!(c, 0);
37809        if (k & 0b00000001) != 0 {
37810            let extracta: f32 = simd_extract!(a, 0);
37811            let extracta = -extracta;
37812            let extractb: f32 = simd_extract!(b, 0);
37813            fnmadd = fmaf32(extracta, extractb, fnmadd);
37814        }
37815        simd_insert!(c, 0, fnmadd)
37816    }
37817}
37818
37819/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37820///
37821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37822#[inline]
37823#[target_feature(enable = "avx512f")]
37824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37825#[cfg_attr(test, assert_instr(vfnmadd))]
37826pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37827    unsafe {
37828        let mut fnmadd: f64 = simd_extract!(a, 0);
37829        if (k & 0b00000001) != 0 {
37830            let extracta = -fnmadd;
37831            let extractb: f64 = simd_extract!(b, 0);
37832            let extractc: f64 = simd_extract!(c, 0);
37833            fnmadd = fmaf64(extracta, extractb, extractc);
37834        }
37835        simd_insert!(a, 0, fnmadd)
37836    }
37837}
37838
37839/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37840///
37841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37842#[inline]
37843#[target_feature(enable = "avx512f")]
37844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37845#[cfg_attr(test, assert_instr(vfnmadd))]
37846pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37847    unsafe {
37848        let mut fnmadd: f64 = 0.;
37849        if (k & 0b00000001) != 0 {
37850            let extracta: f64 = simd_extract!(a, 0);
37851            let extracta = -extracta;
37852            let extractb: f64 = simd_extract!(b, 0);
37853            let extractc: f64 = simd_extract!(c, 0);
37854            fnmadd = fmaf64(extracta, extractb, extractc);
37855        }
37856        simd_insert!(a, 0, fnmadd)
37857    }
37858}
37859
37860/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37861///
37862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37863#[inline]
37864#[target_feature(enable = "avx512f")]
37865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37866#[cfg_attr(test, assert_instr(vfnmadd))]
37867pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37868    unsafe {
37869        let mut fnmadd: f64 = simd_extract!(c, 0);
37870        if (k & 0b00000001) != 0 {
37871            let extracta: f64 = simd_extract!(a, 0);
37872            let extracta = -extracta;
37873            let extractb: f64 = simd_extract!(b, 0);
37874            fnmadd = fmaf64(extracta, extractb, fnmadd);
37875        }
37876        simd_insert!(c, 0, fnmadd)
37877    }
37878}
37879
37880/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37881///
37882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37883#[inline]
37884#[target_feature(enable = "avx512f")]
37885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37886#[cfg_attr(test, assert_instr(vfnmsub))]
37887pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37888    unsafe {
37889        let mut fnmsub: f32 = simd_extract!(a, 0);
37890        if (k & 0b00000001) != 0 {
37891            let extracta = -fnmsub;
37892            let extractb: f32 = simd_extract!(b, 0);
37893            let extractc: f32 = simd_extract!(c, 0);
37894            let extractc = -extractc;
37895            fnmsub = fmaf32(extracta, extractb, extractc);
37896        }
37897        simd_insert!(a, 0, fnmsub)
37898    }
37899}
37900
37901/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37902///
37903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37904#[inline]
37905#[target_feature(enable = "avx512f")]
37906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37907#[cfg_attr(test, assert_instr(vfnmsub))]
37908pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37909    unsafe {
37910        let mut fnmsub: f32 = 0.;
37911        if (k & 0b00000001) != 0 {
37912            let extracta: f32 = simd_extract!(a, 0);
37913            let extracta = -extracta;
37914            let extractb: f32 = simd_extract!(b, 0);
37915            let extractc: f32 = simd_extract!(c, 0);
37916            let extractc = -extractc;
37917            fnmsub = fmaf32(extracta, extractb, extractc);
37918        }
37919        simd_insert!(a, 0, fnmsub)
37920    }
37921}
37922
37923/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37924///
37925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37926#[inline]
37927#[target_feature(enable = "avx512f")]
37928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37929#[cfg_attr(test, assert_instr(vfnmsub))]
37930pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37931    unsafe {
37932        let mut fnmsub: f32 = simd_extract!(c, 0);
37933        if (k & 0b00000001) != 0 {
37934            let extracta: f32 = simd_extract!(a, 0);
37935            let extracta = -extracta;
37936            let extractb: f32 = simd_extract!(b, 0);
37937            let extractc = -fnmsub;
37938            fnmsub = fmaf32(extracta, extractb, extractc);
37939        }
37940        simd_insert!(c, 0, fnmsub)
37941    }
37942}
37943
37944/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37945///
37946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37947#[inline]
37948#[target_feature(enable = "avx512f")]
37949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37950#[cfg_attr(test, assert_instr(vfnmsub))]
37951pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37952    unsafe {
37953        let mut fnmsub: f64 = simd_extract!(a, 0);
37954        if (k & 0b00000001) != 0 {
37955            let extracta = -fnmsub;
37956            let extractb: f64 = simd_extract!(b, 0);
37957            let extractc: f64 = simd_extract!(c, 0);
37958            let extractc = -extractc;
37959            fnmsub = fmaf64(extracta, extractb, extractc);
37960        }
37961        simd_insert!(a, 0, fnmsub)
37962    }
37963}
37964
37965/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37966///
37967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37968#[inline]
37969#[target_feature(enable = "avx512f")]
37970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37971#[cfg_attr(test, assert_instr(vfnmsub))]
37972pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37973    unsafe {
37974        let mut fnmsub: f64 = 0.;
37975        if (k & 0b00000001) != 0 {
37976            let extracta: f64 = simd_extract!(a, 0);
37977            let extracta = -extracta;
37978            let extractb: f64 = simd_extract!(b, 0);
37979            let extractc: f64 = simd_extract!(c, 0);
37980            let extractc = -extractc;
37981            fnmsub = fmaf64(extracta, extractb, extractc);
37982        }
37983        simd_insert!(a, 0, fnmsub)
37984    }
37985}
37986
37987/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37988///
37989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37990#[inline]
37991#[target_feature(enable = "avx512f")]
37992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37993#[cfg_attr(test, assert_instr(vfnmsub))]
37994pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37995    unsafe {
37996        let mut fnmsub: f64 = simd_extract!(c, 0);
37997        if (k & 0b00000001) != 0 {
37998            let extracta: f64 = simd_extract!(a, 0);
37999            let extracta = -extracta;
38000            let extractb: f64 = simd_extract!(b, 0);
38001            let extractc = -fnmsub;
38002            fnmsub = fmaf64(extracta, extractb, extractc);
38003        }
38004        simd_insert!(c, 0, fnmsub)
38005    }
38006}
38007
38008/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38009///
38010/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38011/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38012/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38013/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38014/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38015/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38016///
38017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
38018#[inline]
38019#[target_feature(enable = "avx512f")]
38020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38021#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38022#[rustc_legacy_const_generics(2)]
38023pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38024    unsafe {
38025        static_assert_rounding!(ROUNDING);
38026        let a = a.as_f32x4();
38027        let b = b.as_f32x4();
38028        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38029        transmute(r)
38030    }
38031}
38032
38033/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38034///
38035/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38036/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38037/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38038/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38039/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38040/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38041///
38042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
38043#[inline]
38044#[target_feature(enable = "avx512f")]
38045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38046#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38047#[rustc_legacy_const_generics(4)]
38048pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
38049    src: __m128,
38050    k: __mmask8,
38051    a: __m128,
38052    b: __m128,
38053) -> __m128 {
38054    unsafe {
38055        static_assert_rounding!(ROUNDING);
38056        let a = a.as_f32x4();
38057        let b = b.as_f32x4();
38058        let src = src.as_f32x4();
38059        let r = vaddss(a, b, src, k, ROUNDING);
38060        transmute(r)
38061    }
38062}
38063
38064/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38065///
38066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38072///
38073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
38074#[inline]
38075#[target_feature(enable = "avx512f")]
38076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38077#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38078#[rustc_legacy_const_generics(3)]
38079pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38080    unsafe {
38081        static_assert_rounding!(ROUNDING);
38082        let a = a.as_f32x4();
38083        let b = b.as_f32x4();
38084        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
38085        transmute(r)
38086    }
38087}
38088
38089/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38090///
38091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38097///
38098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38099#[inline]
38100#[target_feature(enable = "avx512f")]
38101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38102#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38103#[rustc_legacy_const_generics(2)]
38104pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38105    unsafe {
38106        static_assert_rounding!(ROUNDING);
38107        let a = a.as_f64x2();
38108        let b = b.as_f64x2();
38109        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38110        transmute(r)
38111    }
38112}
38113
38114/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38115///
38116/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38117/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38118/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38119/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38120/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38121/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38122///
38123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38124#[inline]
38125#[target_feature(enable = "avx512f")]
38126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38127#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38128#[rustc_legacy_const_generics(4)]
38129pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38130    src: __m128d,
38131    k: __mmask8,
38132    a: __m128d,
38133    b: __m128d,
38134) -> __m128d {
38135    unsafe {
38136        static_assert_rounding!(ROUNDING);
38137        let a = a.as_f64x2();
38138        let b = b.as_f64x2();
38139        let src = src.as_f64x2();
38140        let r = vaddsd(a, b, src, k, ROUNDING);
38141        transmute(r)
38142    }
38143}
38144
38145/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38146///
38147/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38148/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38149/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38150/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38151/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38152/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38153///
38154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38155#[inline]
38156#[target_feature(enable = "avx512f")]
38157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38158#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38159#[rustc_legacy_const_generics(3)]
38160pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38161    unsafe {
38162        static_assert_rounding!(ROUNDING);
38163        let a = a.as_f64x2();
38164        let b = b.as_f64x2();
38165        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
38166        transmute(r)
38167    }
38168}
38169
38170/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38171///
38172/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38173/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38174/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38175/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38176/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38177/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38178///
38179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38180#[inline]
38181#[target_feature(enable = "avx512f")]
38182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38183#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38184#[rustc_legacy_const_generics(2)]
38185pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38186    unsafe {
38187        static_assert_rounding!(ROUNDING);
38188        let a = a.as_f32x4();
38189        let b = b.as_f32x4();
38190        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38191        transmute(r)
38192    }
38193}
38194
38195/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38196///
38197/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38198/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38199/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38200/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38201/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38202/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38203///
38204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38205#[inline]
38206#[target_feature(enable = "avx512f")]
38207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38208#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38209#[rustc_legacy_const_generics(4)]
38210pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38211    src: __m128,
38212    k: __mmask8,
38213    a: __m128,
38214    b: __m128,
38215) -> __m128 {
38216    unsafe {
38217        static_assert_rounding!(ROUNDING);
38218        let a = a.as_f32x4();
38219        let b = b.as_f32x4();
38220        let src = src.as_f32x4();
38221        let r = vsubss(a, b, src, k, ROUNDING);
38222        transmute(r)
38223    }
38224}
38225
38226/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38227///
38228/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38229/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38230/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38231/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38232/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38233/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38234///
38235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38236#[inline]
38237#[target_feature(enable = "avx512f")]
38238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38239#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38240#[rustc_legacy_const_generics(3)]
38241pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38242    unsafe {
38243        static_assert_rounding!(ROUNDING);
38244        let a = a.as_f32x4();
38245        let b = b.as_f32x4();
38246        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
38247        transmute(r)
38248    }
38249}
38250
38251/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38252///
38253/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38254/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38255/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38256/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38257/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38258/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38259///
38260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38261#[inline]
38262#[target_feature(enable = "avx512f")]
38263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38264#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38265#[rustc_legacy_const_generics(2)]
38266pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38267    unsafe {
38268        static_assert_rounding!(ROUNDING);
38269        let a = a.as_f64x2();
38270        let b = b.as_f64x2();
38271        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38272        transmute(r)
38273    }
38274}
38275
38276/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38277///
38278/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38279/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38280/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38281/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38282/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38283/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38284///
38285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38286#[inline]
38287#[target_feature(enable = "avx512f")]
38288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38289#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38290#[rustc_legacy_const_generics(4)]
38291pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38292    src: __m128d,
38293    k: __mmask8,
38294    a: __m128d,
38295    b: __m128d,
38296) -> __m128d {
38297    unsafe {
38298        static_assert_rounding!(ROUNDING);
38299        let a = a.as_f64x2();
38300        let b = b.as_f64x2();
38301        let src = src.as_f64x2();
38302        let r = vsubsd(a, b, src, k, ROUNDING);
38303        transmute(r)
38304    }
38305}
38306
38307/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38308///
38309/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38310/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38311/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38312/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38313/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38314/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38315///
38316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38317#[inline]
38318#[target_feature(enable = "avx512f")]
38319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38320#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38321#[rustc_legacy_const_generics(3)]
38322pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38323    unsafe {
38324        static_assert_rounding!(ROUNDING);
38325        let a = a.as_f64x2();
38326        let b = b.as_f64x2();
38327        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
38328        transmute(r)
38329    }
38330}
38331
38332/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38333///
38334/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38335/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38336/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38337/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38338/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38339/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38340///
38341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38342#[inline]
38343#[target_feature(enable = "avx512f")]
38344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38345#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38346#[rustc_legacy_const_generics(2)]
38347pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38348    unsafe {
38349        static_assert_rounding!(ROUNDING);
38350        let a = a.as_f32x4();
38351        let b = b.as_f32x4();
38352        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38353        transmute(r)
38354    }
38355}
38356
38357/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38358///
38359/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38360/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38361/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38362/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38363/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38364/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38365///
38366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38367#[inline]
38368#[target_feature(enable = "avx512f")]
38369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38370#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38371#[rustc_legacy_const_generics(4)]
38372pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38373    src: __m128,
38374    k: __mmask8,
38375    a: __m128,
38376    b: __m128,
38377) -> __m128 {
38378    unsafe {
38379        static_assert_rounding!(ROUNDING);
38380        let a = a.as_f32x4();
38381        let b = b.as_f32x4();
38382        let src = src.as_f32x4();
38383        let r = vmulss(a, b, src, k, ROUNDING);
38384        transmute(r)
38385    }
38386}
38387
38388/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38389///
38390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38396///
38397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38398#[inline]
38399#[target_feature(enable = "avx512f")]
38400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38401#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38402#[rustc_legacy_const_generics(3)]
38403pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38404    unsafe {
38405        static_assert_rounding!(ROUNDING);
38406        let a = a.as_f32x4();
38407        let b = b.as_f32x4();
38408        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
38409        transmute(r)
38410    }
38411}
38412
38413/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38414///
38415/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38416/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38417/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38418/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38419/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38420/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38421///
38422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38423#[inline]
38424#[target_feature(enable = "avx512f")]
38425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38426#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38427#[rustc_legacy_const_generics(2)]
38428pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38429    unsafe {
38430        static_assert_rounding!(ROUNDING);
38431        let a = a.as_f64x2();
38432        let b = b.as_f64x2();
38433        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38434        transmute(r)
38435    }
38436}
38437
38438/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38439///
38440/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38441/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38442/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38443/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38444/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38445/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38446///
38447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38448#[inline]
38449#[target_feature(enable = "avx512f")]
38450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38451#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38452#[rustc_legacy_const_generics(4)]
38453pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38454    src: __m128d,
38455    k: __mmask8,
38456    a: __m128d,
38457    b: __m128d,
38458) -> __m128d {
38459    unsafe {
38460        static_assert_rounding!(ROUNDING);
38461        let a = a.as_f64x2();
38462        let b = b.as_f64x2();
38463        let src = src.as_f64x2();
38464        let r = vmulsd(a, b, src, k, ROUNDING);
38465        transmute(r)
38466    }
38467}
38468
38469/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38470///
38471/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38472/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38473/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38474/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38475/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38476/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38477///
38478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38479#[inline]
38480#[target_feature(enable = "avx512f")]
38481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38482#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38483#[rustc_legacy_const_generics(3)]
38484pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38485    unsafe {
38486        static_assert_rounding!(ROUNDING);
38487        let a = a.as_f64x2();
38488        let b = b.as_f64x2();
38489        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
38490        transmute(r)
38491    }
38492}
38493
38494/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38495///
38496/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38497/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38498/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38499/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38500/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38501/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38502///
38503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38504#[inline]
38505#[target_feature(enable = "avx512f")]
38506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38507#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38508#[rustc_legacy_const_generics(2)]
38509pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38510    unsafe {
38511        static_assert_rounding!(ROUNDING);
38512        let a = a.as_f32x4();
38513        let b = b.as_f32x4();
38514        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38515        transmute(r)
38516    }
38517}
38518
38519/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38520///
38521/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38522/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38523/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38524/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38525/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38526/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38527///
38528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38529#[inline]
38530#[target_feature(enable = "avx512f")]
38531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38532#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38533#[rustc_legacy_const_generics(4)]
38534pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38535    src: __m128,
38536    k: __mmask8,
38537    a: __m128,
38538    b: __m128,
38539) -> __m128 {
38540    unsafe {
38541        static_assert_rounding!(ROUNDING);
38542        let a = a.as_f32x4();
38543        let b = b.as_f32x4();
38544        let src = src.as_f32x4();
38545        let r = vdivss(a, b, src, k, ROUNDING);
38546        transmute(r)
38547    }
38548}
38549
38550/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38551///
38552/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38553/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38554/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38555/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38556/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38557/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38558///
38559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38560#[inline]
38561#[target_feature(enable = "avx512f")]
38562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38563#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38564#[rustc_legacy_const_generics(3)]
38565pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38566    unsafe {
38567        static_assert_rounding!(ROUNDING);
38568        let a = a.as_f32x4();
38569        let b = b.as_f32x4();
38570        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
38571        transmute(r)
38572    }
38573}
38574
38575/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38576///
38577/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38578/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38579/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38580/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38581/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38582/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38583///
38584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38585#[inline]
38586#[target_feature(enable = "avx512f")]
38587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38588#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38589#[rustc_legacy_const_generics(2)]
38590pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38591    unsafe {
38592        static_assert_rounding!(ROUNDING);
38593        let a = a.as_f64x2();
38594        let b = b.as_f64x2();
38595        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38596        transmute(r)
38597    }
38598}
38599
38600/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38601///
38602/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38603/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38604/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38605/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38606/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38607/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38608///
38609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38610#[inline]
38611#[target_feature(enable = "avx512f")]
38612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38613#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38614#[rustc_legacy_const_generics(4)]
38615pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38616    src: __m128d,
38617    k: __mmask8,
38618    a: __m128d,
38619    b: __m128d,
38620) -> __m128d {
38621    unsafe {
38622        static_assert_rounding!(ROUNDING);
38623        let a = a.as_f64x2();
38624        let b = b.as_f64x2();
38625        let src = src.as_f64x2();
38626        let r = vdivsd(a, b, src, k, ROUNDING);
38627        transmute(r)
38628    }
38629}
38630
38631/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38632///
38633/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38634/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38635/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38636/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38637/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38638/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38639///
38640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38641#[inline]
38642#[target_feature(enable = "avx512f")]
38643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38644#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38645#[rustc_legacy_const_generics(3)]
38646pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38647    unsafe {
38648        static_assert_rounding!(ROUNDING);
38649        let a = a.as_f64x2();
38650        let b = b.as_f64x2();
38651        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
38652        transmute(r)
38653    }
38654}
38655
38656/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38657/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38658///
38659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38660#[inline]
38661#[target_feature(enable = "avx512f")]
38662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38663#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38664#[rustc_legacy_const_generics(2)]
38665pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38666    unsafe {
38667        static_assert_sae!(SAE);
38668        let a = a.as_f32x4();
38669        let b = b.as_f32x4();
38670        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
38671        transmute(r)
38672    }
38673}
38674
38675/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38676/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38677///
38678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38679#[inline]
38680#[target_feature(enable = "avx512f")]
38681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38682#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38683#[rustc_legacy_const_generics(4)]
38684pub fn _mm_mask_max_round_ss<const SAE: i32>(
38685    src: __m128,
38686    k: __mmask8,
38687    a: __m128,
38688    b: __m128,
38689) -> __m128 {
38690    unsafe {
38691        static_assert_sae!(SAE);
38692        let a = a.as_f32x4();
38693        let b = b.as_f32x4();
38694        let src = src.as_f32x4();
38695        let r = vmaxss(a, b, src, k, SAE);
38696        transmute(r)
38697    }
38698}
38699
38700/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38701/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38702///
38703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38704#[inline]
38705#[target_feature(enable = "avx512f")]
38706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38707#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38708#[rustc_legacy_const_generics(3)]
38709pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38710    unsafe {
38711        static_assert_sae!(SAE);
38712        let a = a.as_f32x4();
38713        let b = b.as_f32x4();
38714        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
38715        transmute(r)
38716    }
38717}
38718
38719/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38720/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38721///
38722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38723#[inline]
38724#[target_feature(enable = "avx512f")]
38725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38726#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38727#[rustc_legacy_const_generics(2)]
38728pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38729    unsafe {
38730        static_assert_sae!(SAE);
38731        let a = a.as_f64x2();
38732        let b = b.as_f64x2();
38733        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
38734        transmute(r)
38735    }
38736}
38737
38738/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38739/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38740///
38741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38742#[inline]
38743#[target_feature(enable = "avx512f")]
38744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38745#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38746#[rustc_legacy_const_generics(4)]
38747pub fn _mm_mask_max_round_sd<const SAE: i32>(
38748    src: __m128d,
38749    k: __mmask8,
38750    a: __m128d,
38751    b: __m128d,
38752) -> __m128d {
38753    unsafe {
38754        static_assert_sae!(SAE);
38755        let a = a.as_f64x2();
38756        let b = b.as_f64x2();
38757        let src = src.as_f64x2();
38758        let r = vmaxsd(a, b, src, k, SAE);
38759        transmute(r)
38760    }
38761}
38762
38763/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38764/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38765///
38766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38767#[inline]
38768#[target_feature(enable = "avx512f")]
38769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38770#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38771#[rustc_legacy_const_generics(3)]
38772pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38773    unsafe {
38774        static_assert_sae!(SAE);
38775        let a = a.as_f64x2();
38776        let b = b.as_f64x2();
38777        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
38778        transmute(r)
38779    }
38780}
38781
38782/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38784///
38785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38786#[inline]
38787#[target_feature(enable = "avx512f")]
38788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38789#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38790#[rustc_legacy_const_generics(2)]
38791pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38792    unsafe {
38793        static_assert_sae!(SAE);
38794        let a = a.as_f32x4();
38795        let b = b.as_f32x4();
38796        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
38797        transmute(r)
38798    }
38799}
38800
38801/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38802/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38803///
38804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38805#[inline]
38806#[target_feature(enable = "avx512f")]
38807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38808#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38809#[rustc_legacy_const_generics(4)]
38810pub fn _mm_mask_min_round_ss<const SAE: i32>(
38811    src: __m128,
38812    k: __mmask8,
38813    a: __m128,
38814    b: __m128,
38815) -> __m128 {
38816    unsafe {
38817        static_assert_sae!(SAE);
38818        let a = a.as_f32x4();
38819        let b = b.as_f32x4();
38820        let src = src.as_f32x4();
38821        let r = vminss(a, b, src, k, SAE);
38822        transmute(r)
38823    }
38824}
38825
38826/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38827/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38828///
38829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38830#[inline]
38831#[target_feature(enable = "avx512f")]
38832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38833#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38834#[rustc_legacy_const_generics(3)]
38835pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38836    unsafe {
38837        static_assert_sae!(SAE);
38838        let a = a.as_f32x4();
38839        let b = b.as_f32x4();
38840        let r = vminss(a, b, f32x4::ZERO, k, SAE);
38841        transmute(r)
38842    }
38843}
38844
38845/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38846/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38847///
38848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38849#[inline]
38850#[target_feature(enable = "avx512f")]
38851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38852#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38853#[rustc_legacy_const_generics(2)]
38854pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38855    unsafe {
38856        static_assert_sae!(SAE);
38857        let a = a.as_f64x2();
38858        let b = b.as_f64x2();
38859        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
38860        transmute(r)
38861    }
38862}
38863
38864/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38865/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38866///
38867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38868#[inline]
38869#[target_feature(enable = "avx512f")]
38870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38871#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38872#[rustc_legacy_const_generics(4)]
38873pub fn _mm_mask_min_round_sd<const SAE: i32>(
38874    src: __m128d,
38875    k: __mmask8,
38876    a: __m128d,
38877    b: __m128d,
38878) -> __m128d {
38879    unsafe {
38880        static_assert_sae!(SAE);
38881        let a = a.as_f64x2();
38882        let b = b.as_f64x2();
38883        let src = src.as_f64x2();
38884        let r = vminsd(a, b, src, k, SAE);
38885        transmute(r)
38886    }
38887}
38888
38889/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38890/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38891///
38892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38893#[inline]
38894#[target_feature(enable = "avx512f")]
38895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38896#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38897#[rustc_legacy_const_generics(3)]
38898pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38899    unsafe {
38900        static_assert_sae!(SAE);
38901        let a = a.as_f64x2();
38902        let b = b.as_f64x2();
38903        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
38904        transmute(r)
38905    }
38906}
38907
38908/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38909///
38910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38916///
38917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38918#[inline]
38919#[target_feature(enable = "avx512f")]
38920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38921#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38922#[rustc_legacy_const_generics(2)]
38923pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38924    unsafe {
38925        static_assert_rounding!(ROUNDING);
38926        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
38927    }
38928}
38929
38930/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38931///
38932/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38933/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38934/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38935/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38936/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38937/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38938///
38939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38940#[inline]
38941#[target_feature(enable = "avx512f")]
38942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38943#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38944#[rustc_legacy_const_generics(4)]
38945pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38946    src: __m128,
38947    k: __mmask8,
38948    a: __m128,
38949    b: __m128,
38950) -> __m128 {
38951    unsafe {
38952        static_assert_rounding!(ROUNDING);
38953        vsqrtss(a, b, src, k, ROUNDING)
38954    }
38955}
38956
38957/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38958///
38959/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38960/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38961/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38962/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38963/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38964/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38965///
38966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38967#[inline]
38968#[target_feature(enable = "avx512f")]
38969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38970#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38971#[rustc_legacy_const_generics(3)]
38972pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38973    unsafe {
38974        static_assert_rounding!(ROUNDING);
38975        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
38976    }
38977}
38978
38979/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38980///
38981/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38982/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38983/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38984/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38985/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38986/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38987///
38988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38989#[inline]
38990#[target_feature(enable = "avx512f")]
38991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38992#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38993#[rustc_legacy_const_generics(2)]
38994pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38995    unsafe {
38996        static_assert_rounding!(ROUNDING);
38997        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
38998    }
38999}
39000
39001/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39002///
39003/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39004/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39005/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39006/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39007/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39008/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39009///
39010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
39011#[inline]
39012#[target_feature(enable = "avx512f")]
39013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39014#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
39015#[rustc_legacy_const_generics(4)]
39016pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
39017    src: __m128d,
39018    k: __mmask8,
39019    a: __m128d,
39020    b: __m128d,
39021) -> __m128d {
39022    unsafe {
39023        static_assert_rounding!(ROUNDING);
39024        vsqrtsd(a, b, src, k, ROUNDING)
39025    }
39026}
39027
39028/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39029///
39030/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39031/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39032/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39033/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39034/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39035/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39036///
39037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
39038#[inline]
39039#[target_feature(enable = "avx512f")]
39040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39041#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
39042#[rustc_legacy_const_generics(3)]
39043pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
39044    k: __mmask8,
39045    a: __m128d,
39046    b: __m128d,
39047) -> __m128d {
39048    unsafe {
39049        static_assert_rounding!(ROUNDING);
39050        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
39051    }
39052}
39053
39054/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39055/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39056///
39057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
39058#[inline]
39059#[target_feature(enable = "avx512f")]
39060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39061#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39062#[rustc_legacy_const_generics(2)]
39063pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39064    unsafe {
39065        static_assert_sae!(SAE);
39066        let a = a.as_f32x4();
39067        let b = b.as_f32x4();
39068        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
39069        transmute(r)
39070    }
39071}
39072
39073/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39074/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39075///
39076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
39077#[inline]
39078#[target_feature(enable = "avx512f")]
39079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39080#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39081#[rustc_legacy_const_generics(4)]
39082pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
39083    src: __m128,
39084    k: __mmask8,
39085    a: __m128,
39086    b: __m128,
39087) -> __m128 {
39088    unsafe {
39089        static_assert_sae!(SAE);
39090        let a = a.as_f32x4();
39091        let b = b.as_f32x4();
39092        let src = src.as_f32x4();
39093        let r = vgetexpss(a, b, src, k, SAE);
39094        transmute(r)
39095    }
39096}
39097
39098/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39100///
39101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39102#[inline]
39103#[target_feature(enable = "avx512f")]
39104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39105#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39106#[rustc_legacy_const_generics(3)]
39107pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39108    unsafe {
39109        static_assert_sae!(SAE);
39110        let a = a.as_f32x4();
39111        let b = b.as_f32x4();
39112        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
39113        transmute(r)
39114    }
39115}
39116
39117/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39118/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39119///
39120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39121#[inline]
39122#[target_feature(enable = "avx512f")]
39123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39124#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39125#[rustc_legacy_const_generics(2)]
39126pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39127    unsafe {
39128        static_assert_sae!(SAE);
39129        let a = a.as_f64x2();
39130        let b = b.as_f64x2();
39131        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
39132        transmute(r)
39133    }
39134}
39135
39136/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39137/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39138///
39139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39140#[inline]
39141#[target_feature(enable = "avx512f")]
39142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39143#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39144#[rustc_legacy_const_generics(4)]
39145pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39146    src: __m128d,
39147    k: __mmask8,
39148    a: __m128d,
39149    b: __m128d,
39150) -> __m128d {
39151    unsafe {
39152        static_assert_sae!(SAE);
39153        let a = a.as_f64x2();
39154        let b = b.as_f64x2();
39155        let src = src.as_f64x2();
39156        let r = vgetexpsd(a, b, src, k, SAE);
39157        transmute(r)
39158    }
39159}
39160
39161/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39162/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39163///
39164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39165#[inline]
39166#[target_feature(enable = "avx512f")]
39167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39168#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39169#[rustc_legacy_const_generics(3)]
39170pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39171    unsafe {
39172        static_assert_sae!(SAE);
39173        let a = a.as_f64x2();
39174        let b = b.as_f64x2();
39175        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
39176        transmute(r)
39177    }
39178}
39179
39180/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39181/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39182///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39183///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39184///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39185///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39186/// The sign is determined by sc which can take the following values:\
39187///    _MM_MANT_SIGN_src     // sign = sign(src)\
39188///    _MM_MANT_SIGN_zero    // sign = 0\
39189///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39190/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39191///
39192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39193#[inline]
39194#[target_feature(enable = "avx512f")]
39195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39196#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39197#[rustc_legacy_const_generics(2, 3, 4)]
39198pub fn _mm_getmant_round_ss<
39199    const NORM: _MM_MANTISSA_NORM_ENUM,
39200    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39201    const SAE: i32,
39202>(
39203    a: __m128,
39204    b: __m128,
39205) -> __m128 {
39206    unsafe {
39207        static_assert_uimm_bits!(NORM, 4);
39208        static_assert_uimm_bits!(SIGN, 2);
39209        static_assert_mantissas_sae!(SAE);
39210        let a = a.as_f32x4();
39211        let b = b.as_f32x4();
39212        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
39213        transmute(r)
39214    }
39215}
39216
39217/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39218/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39219///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39220///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39221///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39222///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39223/// The sign is determined by sc which can take the following values:\
39224///    _MM_MANT_SIGN_src     // sign = sign(src)\
39225///    _MM_MANT_SIGN_zero    // sign = 0\
39226///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39227/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39228///
39229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39230#[inline]
39231#[target_feature(enable = "avx512f")]
39232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39233#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39234#[rustc_legacy_const_generics(4, 5, 6)]
39235pub fn _mm_mask_getmant_round_ss<
39236    const NORM: _MM_MANTISSA_NORM_ENUM,
39237    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39238    const SAE: i32,
39239>(
39240    src: __m128,
39241    k: __mmask8,
39242    a: __m128,
39243    b: __m128,
39244) -> __m128 {
39245    unsafe {
39246        static_assert_uimm_bits!(NORM, 4);
39247        static_assert_uimm_bits!(SIGN, 2);
39248        static_assert_mantissas_sae!(SAE);
39249        let a = a.as_f32x4();
39250        let b = b.as_f32x4();
39251        let src = src.as_f32x4();
39252        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
39253        transmute(r)
39254    }
39255}
39256
39257/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39258/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39259///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39260///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39261///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39262///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39263/// The sign is determined by sc which can take the following values:\
39264///    _MM_MANT_SIGN_src     // sign = sign(src)\
39265///    _MM_MANT_SIGN_zero    // sign = 0\
39266///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39267/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39268///
39269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39270#[inline]
39271#[target_feature(enable = "avx512f")]
39272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39273#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39274#[rustc_legacy_const_generics(3, 4, 5)]
39275pub fn _mm_maskz_getmant_round_ss<
39276    const NORM: _MM_MANTISSA_NORM_ENUM,
39277    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39278    const SAE: i32,
39279>(
39280    k: __mmask8,
39281    a: __m128,
39282    b: __m128,
39283) -> __m128 {
39284    unsafe {
39285        static_assert_uimm_bits!(NORM, 4);
39286        static_assert_uimm_bits!(SIGN, 2);
39287        static_assert_mantissas_sae!(SAE);
39288        let a = a.as_f32x4();
39289        let b = b.as_f32x4();
39290        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
39291        transmute(r)
39292    }
39293}
39294
39295/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39296/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39297///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39298///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39299///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39300///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39301/// The sign is determined by sc which can take the following values:\
39302///    _MM_MANT_SIGN_src     // sign = sign(src)\
39303///    _MM_MANT_SIGN_zero    // sign = 0\
39304///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39305/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39306///
39307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39308#[inline]
39309#[target_feature(enable = "avx512f")]
39310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39311#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39312#[rustc_legacy_const_generics(2, 3, 4)]
39313pub fn _mm_getmant_round_sd<
39314    const NORM: _MM_MANTISSA_NORM_ENUM,
39315    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39316    const SAE: i32,
39317>(
39318    a: __m128d,
39319    b: __m128d,
39320) -> __m128d {
39321    unsafe {
39322        static_assert_uimm_bits!(NORM, 4);
39323        static_assert_uimm_bits!(SIGN, 2);
39324        static_assert_mantissas_sae!(SAE);
39325        let a = a.as_f64x2();
39326        let b = b.as_f64x2();
39327        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
39328        transmute(r)
39329    }
39330}
39331
39332/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39333/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39334///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39335///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39336///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39337///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39338/// The sign is determined by sc which can take the following values:\
39339///    _MM_MANT_SIGN_src     // sign = sign(src)\
39340///    _MM_MANT_SIGN_zero    // sign = 0\
39341///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39342/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39343///
39344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39345#[inline]
39346#[target_feature(enable = "avx512f")]
39347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39348#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39349#[rustc_legacy_const_generics(4, 5, 6)]
39350pub fn _mm_mask_getmant_round_sd<
39351    const NORM: _MM_MANTISSA_NORM_ENUM,
39352    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39353    const SAE: i32,
39354>(
39355    src: __m128d,
39356    k: __mmask8,
39357    a: __m128d,
39358    b: __m128d,
39359) -> __m128d {
39360    unsafe {
39361        static_assert_uimm_bits!(NORM, 4);
39362        static_assert_uimm_bits!(SIGN, 2);
39363        static_assert_mantissas_sae!(SAE);
39364        let a = a.as_f64x2();
39365        let b = b.as_f64x2();
39366        let src = src.as_f64x2();
39367        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
39368        transmute(r)
39369    }
39370}
39371
39372/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39373/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39374///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39375///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39376///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39377///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39378/// The sign is determined by sc which can take the following values:\
39379///    _MM_MANT_SIGN_src     // sign = sign(src)\
39380///    _MM_MANT_SIGN_zero    // sign = 0\
39381///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39382/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39383///
39384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39385#[inline]
39386#[target_feature(enable = "avx512f")]
39387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39388#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39389#[rustc_legacy_const_generics(3, 4, 5)]
39390pub fn _mm_maskz_getmant_round_sd<
39391    const NORM: _MM_MANTISSA_NORM_ENUM,
39392    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39393    const SAE: i32,
39394>(
39395    k: __mmask8,
39396    a: __m128d,
39397    b: __m128d,
39398) -> __m128d {
39399    unsafe {
39400        static_assert_uimm_bits!(NORM, 4);
39401        static_assert_uimm_bits!(SIGN, 2);
39402        static_assert_mantissas_sae!(SAE);
39403        let a = a.as_f64x2();
39404        let b = b.as_f64x2();
39405        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
39406        transmute(r)
39407    }
39408}
39409
39410/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39411/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39412/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39413/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39414/// * [`_MM_FROUND_TO_POS_INF`] : round up
39415/// * [`_MM_FROUND_TO_ZERO`] : truncate
39416/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39417///
39418/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39420#[inline]
39421#[target_feature(enable = "avx512f")]
39422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39423#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39424#[rustc_legacy_const_generics(2, 3)]
39425pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39426    unsafe {
39427        static_assert_uimm_bits!(IMM8, 8);
39428        static_assert_mantissas_sae!(SAE);
39429        let a = a.as_f32x4();
39430        let b = b.as_f32x4();
39431        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
39432        transmute(r)
39433    }
39434}
39435
39436/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39437/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39438/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39439/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39440/// * [`_MM_FROUND_TO_POS_INF`] : round up
39441/// * [`_MM_FROUND_TO_ZERO`] : truncate
39442/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39443///
39444/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39446#[inline]
39447#[target_feature(enable = "avx512f")]
39448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39449#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39450#[rustc_legacy_const_generics(4, 5)]
39451pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39452    src: __m128,
39453    k: __mmask8,
39454    a: __m128,
39455    b: __m128,
39456) -> __m128 {
39457    unsafe {
39458        static_assert_uimm_bits!(IMM8, 8);
39459        static_assert_mantissas_sae!(SAE);
39460        let a = a.as_f32x4();
39461        let b = b.as_f32x4();
39462        let src = src.as_f32x4();
39463        let r = vrndscaless(a, b, src, k, IMM8, SAE);
39464        transmute(r)
39465    }
39466}
39467
39468/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39469/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39470/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39471/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39472/// * [`_MM_FROUND_TO_POS_INF`] : round up
39473/// * [`_MM_FROUND_TO_ZERO`] : truncate
39474/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39475///
39476/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39478#[inline]
39479#[target_feature(enable = "avx512f")]
39480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39481#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39482#[rustc_legacy_const_generics(3, 4)]
39483pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39484    k: __mmask8,
39485    a: __m128,
39486    b: __m128,
39487) -> __m128 {
39488    unsafe {
39489        static_assert_uimm_bits!(IMM8, 8);
39490        static_assert_mantissas_sae!(SAE);
39491        let a = a.as_f32x4();
39492        let b = b.as_f32x4();
39493        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
39494        transmute(r)
39495    }
39496}
39497
39498/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39499/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39500/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39501/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39502/// * [`_MM_FROUND_TO_POS_INF`] : round up
39503/// * [`_MM_FROUND_TO_ZERO`] : truncate
39504/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39505///
39506/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39508#[inline]
39509#[target_feature(enable = "avx512f")]
39510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39511#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39512#[rustc_legacy_const_generics(2, 3)]
39513pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39514    unsafe {
39515        static_assert_uimm_bits!(IMM8, 8);
39516        static_assert_mantissas_sae!(SAE);
39517        let a = a.as_f64x2();
39518        let b = b.as_f64x2();
39519        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
39520        transmute(r)
39521    }
39522}
39523
39524/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39528/// * [`_MM_FROUND_TO_POS_INF`] : round up
39529/// * [`_MM_FROUND_TO_ZERO`] : truncate
39530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39531///
39532/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39534#[inline]
39535#[target_feature(enable = "avx512f")]
39536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39537#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39538#[rustc_legacy_const_generics(4, 5)]
39539pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39540    src: __m128d,
39541    k: __mmask8,
39542    a: __m128d,
39543    b: __m128d,
39544) -> __m128d {
39545    unsafe {
39546        static_assert_uimm_bits!(IMM8, 8);
39547        static_assert_mantissas_sae!(SAE);
39548        let a = a.as_f64x2();
39549        let b = b.as_f64x2();
39550        let src = src.as_f64x2();
39551        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
39552        transmute(r)
39553    }
39554}
39555
39556/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39557/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39558/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39559/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39560/// * [`_MM_FROUND_TO_POS_INF`] : round up
39561/// * [`_MM_FROUND_TO_ZERO`] : truncate
39562/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39563///
39564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39566#[inline]
39567#[target_feature(enable = "avx512f")]
39568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39569#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39570#[rustc_legacy_const_generics(3, 4)]
39571pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39572    k: __mmask8,
39573    a: __m128d,
39574    b: __m128d,
39575) -> __m128d {
39576    unsafe {
39577        static_assert_uimm_bits!(IMM8, 8);
39578        static_assert_mantissas_sae!(SAE);
39579        let a = a.as_f64x2();
39580        let b = b.as_f64x2();
39581        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
39582        transmute(r)
39583    }
39584}
39585
39586/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39587///
39588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39594///
39595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39596#[inline]
39597#[target_feature(enable = "avx512f")]
39598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39599#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39600#[rustc_legacy_const_generics(2)]
39601pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39602    unsafe {
39603        static_assert_rounding!(ROUNDING);
39604        let a = a.as_f32x4();
39605        let b = b.as_f32x4();
39606        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
39607        transmute(r)
39608    }
39609}
39610
39611/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39612///
39613/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39614/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39615/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39616/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39617/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39618/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39619///
39620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39621#[inline]
39622#[target_feature(enable = "avx512f")]
39623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39624#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39625#[rustc_legacy_const_generics(4)]
39626pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39627    src: __m128,
39628    k: __mmask8,
39629    a: __m128,
39630    b: __m128,
39631) -> __m128 {
39632    unsafe {
39633        static_assert_rounding!(ROUNDING);
39634        let a = a.as_f32x4();
39635        let b = b.as_f32x4();
39636        let src = src.as_f32x4();
39637        let r = vscalefss(a, b, src, k, ROUNDING);
39638        transmute(r)
39639    }
39640}
39641
39642/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39643///
39644/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39645/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39646/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39647/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39648/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39649/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39650///
39651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39652#[inline]
39653#[target_feature(enable = "avx512f")]
39654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39655#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39656#[rustc_legacy_const_generics(3)]
39657pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39658    unsafe {
39659        static_assert_rounding!(ROUNDING);
39660        let a = a.as_f32x4();
39661        let b = b.as_f32x4();
39662        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
39663        transmute(r)
39664    }
39665}
39666
39667/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39668///
39669/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39670/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39671/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39672/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39673/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39674/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39675///
39676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39677#[inline]
39678#[target_feature(enable = "avx512f")]
39679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39680#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39681#[rustc_legacy_const_generics(2)]
39682pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39683    unsafe {
39684        static_assert_rounding!(ROUNDING);
39685        let a = a.as_f64x2();
39686        let b = b.as_f64x2();
39687        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
39688        transmute(r)
39689    }
39690}
39691
39692/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39693///
39694/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39695/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39696/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39697/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39698/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39700///
39701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39702#[inline]
39703#[target_feature(enable = "avx512f")]
39704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39705#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39706#[rustc_legacy_const_generics(4)]
39707pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39708    src: __m128d,
39709    k: __mmask8,
39710    a: __m128d,
39711    b: __m128d,
39712) -> __m128d {
39713    unsafe {
39714        let a = a.as_f64x2();
39715        let b = b.as_f64x2();
39716        let src = src.as_f64x2();
39717        let r = vscalefsd(a, b, src, k, ROUNDING);
39718        transmute(r)
39719    }
39720}
39721
39722/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39723///
39724/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39725/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39726/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39727/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39728/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39729/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39730///
39731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39732#[inline]
39733#[target_feature(enable = "avx512f")]
39734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39735#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39736#[rustc_legacy_const_generics(3)]
39737pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39738    k: __mmask8,
39739    a: __m128d,
39740    b: __m128d,
39741) -> __m128d {
39742    unsafe {
39743        static_assert_rounding!(ROUNDING);
39744        let a = a.as_f64x2();
39745        let b = b.as_f64x2();
39746        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
39747        transmute(r)
39748    }
39749}
39750
39751/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39752///
39753/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39754/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39755/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39756/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39757/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39758/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39759///
39760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39761#[inline]
39762#[target_feature(enable = "avx512f")]
39763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39764#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39765#[rustc_legacy_const_generics(3)]
39766pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39767    unsafe {
39768        static_assert_rounding!(ROUNDING);
39769        let extracta: f32 = simd_extract!(a, 0);
39770        let extractb: f32 = simd_extract!(b, 0);
39771        let extractc: f32 = simd_extract!(c, 0);
39772        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39773        simd_insert!(a, 0, r)
39774    }
39775}
39776
39777/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39778///
39779/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39780/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39781/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39782/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39783/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39784/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39785///
39786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39787#[inline]
39788#[target_feature(enable = "avx512f")]
39789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39790#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39791#[rustc_legacy_const_generics(4)]
39792pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39793    a: __m128,
39794    k: __mmask8,
39795    b: __m128,
39796    c: __m128,
39797) -> __m128 {
39798    unsafe {
39799        static_assert_rounding!(ROUNDING);
39800        let mut fmadd: f32 = simd_extract!(a, 0);
39801        if (k & 0b00000001) != 0 {
39802            let extractb: f32 = simd_extract!(b, 0);
39803            let extractc: f32 = simd_extract!(c, 0);
39804            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
39805        }
39806        simd_insert!(a, 0, fmadd)
39807    }
39808}
39809
39810/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39811///
39812/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39813/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39814/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39815/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39816/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39817/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39818///
39819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39820#[inline]
39821#[target_feature(enable = "avx512f")]
39822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39823#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39824#[rustc_legacy_const_generics(4)]
39825pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39826    k: __mmask8,
39827    a: __m128,
39828    b: __m128,
39829    c: __m128,
39830) -> __m128 {
39831    unsafe {
39832        static_assert_rounding!(ROUNDING);
39833        let mut fmadd: f32 = 0.;
39834        if (k & 0b00000001) != 0 {
39835            let extracta: f32 = simd_extract!(a, 0);
39836            let extractb: f32 = simd_extract!(b, 0);
39837            let extractc: f32 = simd_extract!(c, 0);
39838            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39839        }
39840        simd_insert!(a, 0, fmadd)
39841    }
39842}
39843
39844/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39845///
39846/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39847/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39848/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39849/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39850/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39851/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39852///
39853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39854#[inline]
39855#[target_feature(enable = "avx512f")]
39856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39857#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39858#[rustc_legacy_const_generics(4)]
39859pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39860    a: __m128,
39861    b: __m128,
39862    c: __m128,
39863    k: __mmask8,
39864) -> __m128 {
39865    unsafe {
39866        static_assert_rounding!(ROUNDING);
39867        let mut fmadd: f32 = simd_extract!(c, 0);
39868        if (k & 0b00000001) != 0 {
39869            let extracta: f32 = simd_extract!(a, 0);
39870            let extractb: f32 = simd_extract!(b, 0);
39871            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
39872        }
39873        simd_insert!(c, 0, fmadd)
39874    }
39875}
39876
39877/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39878///
39879/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39880/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39881/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39882/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39883/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39884/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39885///
39886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39887#[inline]
39888#[target_feature(enable = "avx512f")]
39889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39890#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39891#[rustc_legacy_const_generics(3)]
39892pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39893    unsafe {
39894        static_assert_rounding!(ROUNDING);
39895        let extracta: f64 = simd_extract!(a, 0);
39896        let extractb: f64 = simd_extract!(b, 0);
39897        let extractc: f64 = simd_extract!(c, 0);
39898        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39899        simd_insert!(a, 0, fmadd)
39900    }
39901}
39902
39903/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39904///
39905/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39906/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39907/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39908/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39909/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39910/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39911///
39912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39913#[inline]
39914#[target_feature(enable = "avx512f")]
39915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39916#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39917#[rustc_legacy_const_generics(4)]
39918pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39919    a: __m128d,
39920    k: __mmask8,
39921    b: __m128d,
39922    c: __m128d,
39923) -> __m128d {
39924    unsafe {
39925        static_assert_rounding!(ROUNDING);
39926        let mut fmadd: f64 = simd_extract!(a, 0);
39927        if (k & 0b00000001) != 0 {
39928            let extractb: f64 = simd_extract!(b, 0);
39929            let extractc: f64 = simd_extract!(c, 0);
39930            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
39931        }
39932        simd_insert!(a, 0, fmadd)
39933    }
39934}
39935
39936/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39937///
39938/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39939/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39940/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39941/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39942/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39943/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39944///
39945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39946#[inline]
39947#[target_feature(enable = "avx512f")]
39948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39949#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39950#[rustc_legacy_const_generics(4)]
39951pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39952    k: __mmask8,
39953    a: __m128d,
39954    b: __m128d,
39955    c: __m128d,
39956) -> __m128d {
39957    unsafe {
39958        static_assert_rounding!(ROUNDING);
39959        let mut fmadd: f64 = 0.;
39960        if (k & 0b00000001) != 0 {
39961            let extracta: f64 = simd_extract!(a, 0);
39962            let extractb: f64 = simd_extract!(b, 0);
39963            let extractc: f64 = simd_extract!(c, 0);
39964            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39965        }
39966        simd_insert!(a, 0, fmadd)
39967    }
39968}
39969
39970/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39971///
39972/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39973/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39974/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39975/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39976/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39977/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39978///
39979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39980#[inline]
39981#[target_feature(enable = "avx512f")]
39982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39983#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39984#[rustc_legacy_const_generics(4)]
39985pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39986    a: __m128d,
39987    b: __m128d,
39988    c: __m128d,
39989    k: __mmask8,
39990) -> __m128d {
39991    unsafe {
39992        static_assert_rounding!(ROUNDING);
39993        let mut fmadd: f64 = simd_extract!(c, 0);
39994        if (k & 0b00000001) != 0 {
39995            let extracta: f64 = simd_extract!(a, 0);
39996            let extractb: f64 = simd_extract!(b, 0);
39997            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
39998        }
39999        simd_insert!(c, 0, fmadd)
40000    }
40001}
40002
40003/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40004///
40005/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40006/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40007/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40008/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40009/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40010/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40011///
40012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
40013#[inline]
40014#[target_feature(enable = "avx512f")]
40015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40016#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40017#[rustc_legacy_const_generics(3)]
40018pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40019    unsafe {
40020        static_assert_rounding!(ROUNDING);
40021        let extracta: f32 = simd_extract!(a, 0);
40022        let extractb: f32 = simd_extract!(b, 0);
40023        let extractc: f32 = simd_extract!(c, 0);
40024        let extractc = -extractc;
40025        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40026        simd_insert!(a, 0, fmsub)
40027    }
40028}
40029
40030/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40031///
40032/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40033/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40034/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40035/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40036/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40037/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40038///
40039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
40040#[inline]
40041#[target_feature(enable = "avx512f")]
40042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40043#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40044#[rustc_legacy_const_generics(4)]
40045pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
40046    a: __m128,
40047    k: __mmask8,
40048    b: __m128,
40049    c: __m128,
40050) -> __m128 {
40051    unsafe {
40052        static_assert_rounding!(ROUNDING);
40053        let mut fmsub: f32 = simd_extract!(a, 0);
40054        if (k & 0b00000001) != 0 {
40055            let extractb: f32 = simd_extract!(b, 0);
40056            let extractc: f32 = simd_extract!(c, 0);
40057            let extractc = -extractc;
40058            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
40059        }
40060        simd_insert!(a, 0, fmsub)
40061    }
40062}
40063
40064/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40065///
40066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40072///
40073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
40074#[inline]
40075#[target_feature(enable = "avx512f")]
40076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40077#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40078#[rustc_legacy_const_generics(4)]
40079pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
40080    k: __mmask8,
40081    a: __m128,
40082    b: __m128,
40083    c: __m128,
40084) -> __m128 {
40085    unsafe {
40086        static_assert_rounding!(ROUNDING);
40087        let mut fmsub: f32 = 0.;
40088        if (k & 0b00000001) != 0 {
40089            let extracta: f32 = simd_extract!(a, 0);
40090            let extractb: f32 = simd_extract!(b, 0);
40091            let extractc: f32 = simd_extract!(c, 0);
40092            let extractc = -extractc;
40093            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40094        }
40095        simd_insert!(a, 0, fmsub)
40096    }
40097}
40098
40099/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40100///
40101/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40102/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40103/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40104/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40105/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40106/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40107///
40108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40109#[inline]
40110#[target_feature(enable = "avx512f")]
40111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40112#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40113#[rustc_legacy_const_generics(4)]
40114pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40115    a: __m128,
40116    b: __m128,
40117    c: __m128,
40118    k: __mmask8,
40119) -> __m128 {
40120    unsafe {
40121        static_assert_rounding!(ROUNDING);
40122        let mut fmsub: f32 = simd_extract!(c, 0);
40123        if (k & 0b00000001) != 0 {
40124            let extracta: f32 = simd_extract!(a, 0);
40125            let extractb: f32 = simd_extract!(b, 0);
40126            let extractc = -fmsub;
40127            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40128        }
40129        simd_insert!(c, 0, fmsub)
40130    }
40131}
40132
40133/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40134///
40135/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40136/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40137/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40138/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40139/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40140/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40141///
40142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40143#[inline]
40144#[target_feature(enable = "avx512f")]
40145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40146#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40147#[rustc_legacy_const_generics(3)]
40148pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40149    unsafe {
40150        static_assert_rounding!(ROUNDING);
40151        let extracta: f64 = simd_extract!(a, 0);
40152        let extractb: f64 = simd_extract!(b, 0);
40153        let extractc: f64 = simd_extract!(c, 0);
40154        let extractc = -extractc;
40155        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40156        simd_insert!(a, 0, fmsub)
40157    }
40158}
40159
40160/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40161///
40162/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40163/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40164/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40165/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40166/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40167/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40168///
40169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40170#[inline]
40171#[target_feature(enable = "avx512f")]
40172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40173#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40174#[rustc_legacy_const_generics(4)]
40175pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40176    a: __m128d,
40177    k: __mmask8,
40178    b: __m128d,
40179    c: __m128d,
40180) -> __m128d {
40181    unsafe {
40182        static_assert_rounding!(ROUNDING);
40183        let mut fmsub: f64 = simd_extract!(a, 0);
40184        if (k & 0b00000001) != 0 {
40185            let extractb: f64 = simd_extract!(b, 0);
40186            let extractc: f64 = simd_extract!(c, 0);
40187            let extractc = -extractc;
40188            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
40189        }
40190        simd_insert!(a, 0, fmsub)
40191    }
40192}
40193
40194/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40195///
40196/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40197/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40198/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40199/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40200/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40201/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40202///
40203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40204#[inline]
40205#[target_feature(enable = "avx512f")]
40206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40207#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40208#[rustc_legacy_const_generics(4)]
40209pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40210    k: __mmask8,
40211    a: __m128d,
40212    b: __m128d,
40213    c: __m128d,
40214) -> __m128d {
40215    unsafe {
40216        static_assert_rounding!(ROUNDING);
40217        let mut fmsub: f64 = 0.;
40218        if (k & 0b00000001) != 0 {
40219            let extracta: f64 = simd_extract!(a, 0);
40220            let extractb: f64 = simd_extract!(b, 0);
40221            let extractc: f64 = simd_extract!(c, 0);
40222            let extractc = -extractc;
40223            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40224        }
40225        simd_insert!(a, 0, fmsub)
40226    }
40227}
40228
40229/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40230///
40231/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40232/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40233/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40234/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40235/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40236/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40237///
40238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40239#[inline]
40240#[target_feature(enable = "avx512f")]
40241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40242#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40243#[rustc_legacy_const_generics(4)]
40244pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40245    a: __m128d,
40246    b: __m128d,
40247    c: __m128d,
40248    k: __mmask8,
40249) -> __m128d {
40250    unsafe {
40251        static_assert_rounding!(ROUNDING);
40252        let mut fmsub: f64 = simd_extract!(c, 0);
40253        if (k & 0b00000001) != 0 {
40254            let extracta: f64 = simd_extract!(a, 0);
40255            let extractb: f64 = simd_extract!(b, 0);
40256            let extractc = -fmsub;
40257            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40258        }
40259        simd_insert!(c, 0, fmsub)
40260    }
40261}
40262
40263/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40264///
40265/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40266/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40267/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40268/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40269/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40270/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40271///
40272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40273#[inline]
40274#[target_feature(enable = "avx512f")]
40275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40276#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40277#[rustc_legacy_const_generics(3)]
40278pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40279    unsafe {
40280        static_assert_rounding!(ROUNDING);
40281        let extracta: f32 = simd_extract!(a, 0);
40282        let extracta = -extracta;
40283        let extractb: f32 = simd_extract!(b, 0);
40284        let extractc: f32 = simd_extract!(c, 0);
40285        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40286        simd_insert!(a, 0, fnmadd)
40287    }
40288}
40289
40290/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40291///
40292/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40293/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40294/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40295/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40296/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40297/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40298///
40299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40300#[inline]
40301#[target_feature(enable = "avx512f")]
40302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40303#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40304#[rustc_legacy_const_generics(4)]
40305pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40306    a: __m128,
40307    k: __mmask8,
40308    b: __m128,
40309    c: __m128,
40310) -> __m128 {
40311    unsafe {
40312        static_assert_rounding!(ROUNDING);
40313        let mut fnmadd: f32 = simd_extract!(a, 0);
40314        if (k & 0b00000001) != 0 {
40315            let extracta = -fnmadd;
40316            let extractb: f32 = simd_extract!(b, 0);
40317            let extractc: f32 = simd_extract!(c, 0);
40318            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40319        }
40320        simd_insert!(a, 0, fnmadd)
40321    }
40322}
40323
40324/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40325///
40326/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40327/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40328/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40329/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40330/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40331/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40332///
40333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40334#[inline]
40335#[target_feature(enable = "avx512f")]
40336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40337#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40338#[rustc_legacy_const_generics(4)]
40339pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40340    k: __mmask8,
40341    a: __m128,
40342    b: __m128,
40343    c: __m128,
40344) -> __m128 {
40345    unsafe {
40346        static_assert_rounding!(ROUNDING);
40347        let mut fnmadd: f32 = 0.;
40348        if (k & 0b00000001) != 0 {
40349            let extracta: f32 = simd_extract!(a, 0);
40350            let extracta = -extracta;
40351            let extractb: f32 = simd_extract!(b, 0);
40352            let extractc: f32 = simd_extract!(c, 0);
40353            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40354        }
40355        simd_insert!(a, 0, fnmadd)
40356    }
40357}
40358
40359/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40360///
40361/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40362/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40363/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40364/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40365/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40366/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40367///
40368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40369#[inline]
40370#[target_feature(enable = "avx512f")]
40371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40372#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40373#[rustc_legacy_const_generics(4)]
40374pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40375    a: __m128,
40376    b: __m128,
40377    c: __m128,
40378    k: __mmask8,
40379) -> __m128 {
40380    unsafe {
40381        static_assert_rounding!(ROUNDING);
40382        let mut fnmadd: f32 = simd_extract!(c, 0);
40383        if (k & 0b00000001) != 0 {
40384            let extracta: f32 = simd_extract!(a, 0);
40385            let extracta = -extracta;
40386            let extractb: f32 = simd_extract!(b, 0);
40387            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
40388        }
40389        simd_insert!(c, 0, fnmadd)
40390    }
40391}
40392
40393/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40394///
40395/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40396/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40397/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40398/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40399/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40400/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40401///
40402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40403#[inline]
40404#[target_feature(enable = "avx512f")]
40405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40406#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40407#[rustc_legacy_const_generics(3)]
40408pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40409    unsafe {
40410        static_assert_rounding!(ROUNDING);
40411        let extracta: f64 = simd_extract!(a, 0);
40412        let extracta = -extracta;
40413        let extractb: f64 = simd_extract!(b, 0);
40414        let extractc: f64 = simd_extract!(c, 0);
40415        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40416        simd_insert!(a, 0, fnmadd)
40417    }
40418}
40419
40420/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40421///
40422/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40423/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40424/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40425/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40426/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40427/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40428///
40429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40430#[inline]
40431#[target_feature(enable = "avx512f")]
40432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40433#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40434#[rustc_legacy_const_generics(4)]
40435pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40436    a: __m128d,
40437    k: __mmask8,
40438    b: __m128d,
40439    c: __m128d,
40440) -> __m128d {
40441    unsafe {
40442        static_assert_rounding!(ROUNDING);
40443        let mut fnmadd: f64 = simd_extract!(a, 0);
40444        if (k & 0b00000001) != 0 {
40445            let extracta = -fnmadd;
40446            let extractb: f64 = simd_extract!(b, 0);
40447            let extractc: f64 = simd_extract!(c, 0);
40448            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40449        }
40450        simd_insert!(a, 0, fnmadd)
40451    }
40452}
40453
40454/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40455///
40456/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40457/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40458/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40459/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40460/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40461/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40462///
40463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40464#[inline]
40465#[target_feature(enable = "avx512f")]
40466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40467#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40468#[rustc_legacy_const_generics(4)]
40469pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40470    k: __mmask8,
40471    a: __m128d,
40472    b: __m128d,
40473    c: __m128d,
40474) -> __m128d {
40475    unsafe {
40476        static_assert_rounding!(ROUNDING);
40477        let mut fnmadd: f64 = 0.;
40478        if (k & 0b00000001) != 0 {
40479            let extracta: f64 = simd_extract!(a, 0);
40480            let extracta = -extracta;
40481            let extractb: f64 = simd_extract!(b, 0);
40482            let extractc: f64 = simd_extract!(c, 0);
40483            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40484        }
40485        simd_insert!(a, 0, fnmadd)
40486    }
40487}
40488
40489/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40490///
40491/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40492/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40493/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40494/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40495/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40496/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40497///
40498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40499#[inline]
40500#[target_feature(enable = "avx512f")]
40501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40502#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40503#[rustc_legacy_const_generics(4)]
40504pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40505    a: __m128d,
40506    b: __m128d,
40507    c: __m128d,
40508    k: __mmask8,
40509) -> __m128d {
40510    unsafe {
40511        static_assert_rounding!(ROUNDING);
40512        let mut fnmadd: f64 = simd_extract!(c, 0);
40513        if (k & 0b00000001) != 0 {
40514            let extracta: f64 = simd_extract!(a, 0);
40515            let extracta = -extracta;
40516            let extractb: f64 = simd_extract!(b, 0);
40517            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
40518        }
40519        simd_insert!(c, 0, fnmadd)
40520    }
40521}
40522
40523/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40524///
40525/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40526/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40527/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40528/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40529/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40531///
40532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40533#[inline]
40534#[target_feature(enable = "avx512f")]
40535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40536#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40537#[rustc_legacy_const_generics(3)]
40538pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40539    unsafe {
40540        static_assert_rounding!(ROUNDING);
40541        let extracta: f32 = simd_extract!(a, 0);
40542        let extracta = -extracta;
40543        let extractb: f32 = simd_extract!(b, 0);
40544        let extractc: f32 = simd_extract!(c, 0);
40545        let extractc = -extractc;
40546        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40547        simd_insert!(a, 0, fnmsub)
40548    }
40549}
40550
40551/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40552///
40553/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40554/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40555/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40556/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40557/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40558/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40559///
40560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40561#[inline]
40562#[target_feature(enable = "avx512f")]
40563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40564#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40565#[rustc_legacy_const_generics(4)]
40566pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40567    a: __m128,
40568    k: __mmask8,
40569    b: __m128,
40570    c: __m128,
40571) -> __m128 {
40572    unsafe {
40573        static_assert_rounding!(ROUNDING);
40574        let mut fnmsub: f32 = simd_extract!(a, 0);
40575        if (k & 0b00000001) != 0 {
40576            let extracta = -fnmsub;
40577            let extractb: f32 = simd_extract!(b, 0);
40578            let extractc: f32 = simd_extract!(c, 0);
40579            let extractc = -extractc;
40580            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40581        }
40582        simd_insert!(a, 0, fnmsub)
40583    }
40584}
40585
40586/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40587///
40588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40594///
40595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40596#[inline]
40597#[target_feature(enable = "avx512f")]
40598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40599#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40600#[rustc_legacy_const_generics(4)]
40601pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40602    k: __mmask8,
40603    a: __m128,
40604    b: __m128,
40605    c: __m128,
40606) -> __m128 {
40607    unsafe {
40608        static_assert_rounding!(ROUNDING);
40609        let mut fnmsub: f32 = 0.;
40610        if (k & 0b00000001) != 0 {
40611            let extracta: f32 = simd_extract!(a, 0);
40612            let extracta = -extracta;
40613            let extractb: f32 = simd_extract!(b, 0);
40614            let extractc: f32 = simd_extract!(c, 0);
40615            let extractc = -extractc;
40616            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40617        }
40618        simd_insert!(a, 0, fnmsub)
40619    }
40620}
40621
40622/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40623///
40624/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40625/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40626/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40627/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40628/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40630///
40631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40632#[inline]
40633#[target_feature(enable = "avx512f")]
40634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40635#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40636#[rustc_legacy_const_generics(4)]
40637pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40638    a: __m128,
40639    b: __m128,
40640    c: __m128,
40641    k: __mmask8,
40642) -> __m128 {
40643    unsafe {
40644        static_assert_rounding!(ROUNDING);
40645        let mut fnmsub: f32 = simd_extract!(c, 0);
40646        if (k & 0b00000001) != 0 {
40647            let extracta: f32 = simd_extract!(a, 0);
40648            let extracta = -extracta;
40649            let extractb: f32 = simd_extract!(b, 0);
40650            let extractc = -fnmsub;
40651            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40652        }
40653        simd_insert!(c, 0, fnmsub)
40654    }
40655}
40656
40657/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40658///
40659/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40660/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40661/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40662/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40663/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40664/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40665///
40666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40667#[inline]
40668#[target_feature(enable = "avx512f")]
40669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40670#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40671#[rustc_legacy_const_generics(3)]
40672pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40673    unsafe {
40674        static_assert_rounding!(ROUNDING);
40675        let extracta: f64 = simd_extract!(a, 0);
40676        let extracta = -extracta;
40677        let extractb: f64 = simd_extract!(b, 0);
40678        let extractc: f64 = simd_extract!(c, 0);
40679        let extractc = -extractc;
40680        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40681        simd_insert!(a, 0, fnmsub)
40682    }
40683}
40684
40685/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40686///
40687/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40688/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40689/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40690/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40691/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40692/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40693///
40694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40695#[inline]
40696#[target_feature(enable = "avx512f")]
40697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40698#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40699#[rustc_legacy_const_generics(4)]
40700pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40701    a: __m128d,
40702    k: __mmask8,
40703    b: __m128d,
40704    c: __m128d,
40705) -> __m128d {
40706    unsafe {
40707        static_assert_rounding!(ROUNDING);
40708        let mut fnmsub: f64 = simd_extract!(a, 0);
40709        if (k & 0b00000001) != 0 {
40710            let extracta = -fnmsub;
40711            let extractb: f64 = simd_extract!(b, 0);
40712            let extractc: f64 = simd_extract!(c, 0);
40713            let extractc = -extractc;
40714            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40715        }
40716        simd_insert!(a, 0, fnmsub)
40717    }
40718}
40719
40720/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40721///
40722/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40723/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40724/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40725/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40726/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40727/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40728///
40729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40730#[inline]
40731#[target_feature(enable = "avx512f")]
40732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40733#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40734#[rustc_legacy_const_generics(4)]
40735pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40736    k: __mmask8,
40737    a: __m128d,
40738    b: __m128d,
40739    c: __m128d,
40740) -> __m128d {
40741    unsafe {
40742        static_assert_rounding!(ROUNDING);
40743        let mut fnmsub: f64 = 0.;
40744        if (k & 0b00000001) != 0 {
40745            let extracta: f64 = simd_extract!(a, 0);
40746            let extracta = -extracta;
40747            let extractb: f64 = simd_extract!(b, 0);
40748            let extractc: f64 = simd_extract!(c, 0);
40749            let extractc = -extractc;
40750            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40751        }
40752        simd_insert!(a, 0, fnmsub)
40753    }
40754}
40755
40756/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40757///
40758/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40759/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40760/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40761/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40762/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40763/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40764///
40765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40766#[inline]
40767#[target_feature(enable = "avx512f")]
40768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40769#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40770#[rustc_legacy_const_generics(4)]
40771pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40772    a: __m128d,
40773    b: __m128d,
40774    c: __m128d,
40775    k: __mmask8,
40776) -> __m128d {
40777    unsafe {
40778        static_assert_rounding!(ROUNDING);
40779        let mut fnmsub: f64 = simd_extract!(c, 0);
40780        if (k & 0b00000001) != 0 {
40781            let extracta: f64 = simd_extract!(a, 0);
40782            let extracta = -extracta;
40783            let extractb: f64 = simd_extract!(b, 0);
40784            let extractc = -fnmsub;
40785            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40786        }
40787        simd_insert!(c, 0, fnmsub)
40788    }
40789}
40790
40791/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40792///
40793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40794#[inline]
40795#[target_feature(enable = "avx512f")]
40796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40797#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40798#[rustc_legacy_const_generics(3)]
40799pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40800    unsafe {
40801        static_assert_uimm_bits!(IMM8, 8);
40802        let a = a.as_f32x4();
40803        let b = b.as_f32x4();
40804        let c = c.as_i32x4();
40805        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40806        let fixupimm: f32 = simd_extract!(r, 0);
40807        let r = simd_insert!(a, 0, fixupimm);
40808        transmute(r)
40809    }
40810}
40811
40812/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40813///
40814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40815#[inline]
40816#[target_feature(enable = "avx512f")]
40817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40818#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40819#[rustc_legacy_const_generics(4)]
40820pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40821    a: __m128,
40822    k: __mmask8,
40823    b: __m128,
40824    c: __m128i,
40825) -> __m128 {
40826    unsafe {
40827        static_assert_uimm_bits!(IMM8, 8);
40828        let a = a.as_f32x4();
40829        let b = b.as_f32x4();
40830        let c = c.as_i32x4();
40831        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40832        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40833        let r = simd_insert!(a, 0, fixupimm);
40834        transmute(r)
40835    }
40836}
40837
40838/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40839///
40840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40841#[inline]
40842#[target_feature(enable = "avx512f")]
40843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40844#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40845#[rustc_legacy_const_generics(4)]
40846pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40847    k: __mmask8,
40848    a: __m128,
40849    b: __m128,
40850    c: __m128i,
40851) -> __m128 {
40852    unsafe {
40853        static_assert_uimm_bits!(IMM8, 8);
40854        let a = a.as_f32x4();
40855        let b = b.as_f32x4();
40856        let c = c.as_i32x4();
40857        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40858        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40859        let r = simd_insert!(a, 0, fixupimm);
40860        transmute(r)
40861    }
40862}
40863
40864/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40865///
40866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40867#[inline]
40868#[target_feature(enable = "avx512f")]
40869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40870#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40871#[rustc_legacy_const_generics(3)]
40872pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40873    unsafe {
40874        static_assert_uimm_bits!(IMM8, 8);
40875        let a = a.as_f64x2();
40876        let b = b.as_f64x2();
40877        let c = c.as_i64x2();
40878        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40879        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40880        let r = simd_insert!(a, 0, fixupimm);
40881        transmute(r)
40882    }
40883}
40884
40885/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40886///
40887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40888#[inline]
40889#[target_feature(enable = "avx512f")]
40890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40891#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40892#[rustc_legacy_const_generics(4)]
40893pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40894    a: __m128d,
40895    k: __mmask8,
40896    b: __m128d,
40897    c: __m128i,
40898) -> __m128d {
40899    unsafe {
40900        static_assert_uimm_bits!(IMM8, 8);
40901        let a = a.as_f64x2();
40902        let b = b.as_f64x2();
40903        let c = c.as_i64x2();
40904        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40905        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40906        let r = simd_insert!(a, 0, fixupimm);
40907        transmute(r)
40908    }
40909}
40910
40911/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40912///
40913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40914#[inline]
40915#[target_feature(enable = "avx512f")]
40916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40917#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40918#[rustc_legacy_const_generics(4)]
40919pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40920    k: __mmask8,
40921    a: __m128d,
40922    b: __m128d,
40923    c: __m128i,
40924) -> __m128d {
40925    unsafe {
40926        static_assert_uimm_bits!(IMM8, 8);
40927        let a = a.as_f64x2();
40928        let b = b.as_f64x2();
40929        let c = c.as_i64x2();
40930        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40931        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40932        let r = simd_insert!(a, 0, fixupimm);
40933        transmute(r)
40934    }
40935}
40936
40937/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40938/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40939///
40940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40941#[inline]
40942#[target_feature(enable = "avx512f")]
40943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40944#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40945#[rustc_legacy_const_generics(3, 4)]
40946pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40947    a: __m128,
40948    b: __m128,
40949    c: __m128i,
40950) -> __m128 {
40951    unsafe {
40952        static_assert_uimm_bits!(IMM8, 8);
40953        static_assert_mantissas_sae!(SAE);
40954        let a = a.as_f32x4();
40955        let b = b.as_f32x4();
40956        let c = c.as_i32x4();
40957        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
40958        let fixupimm: f32 = simd_extract!(r, 0);
40959        let r = simd_insert!(a, 0, fixupimm);
40960        transmute(r)
40961    }
40962}
40963
40964/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40965/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40966///
40967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40968#[inline]
40969#[target_feature(enable = "avx512f")]
40970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40971#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40972#[rustc_legacy_const_generics(4, 5)]
40973pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40974    a: __m128,
40975    k: __mmask8,
40976    b: __m128,
40977    c: __m128i,
40978) -> __m128 {
40979    unsafe {
40980        static_assert_uimm_bits!(IMM8, 8);
40981        static_assert_mantissas_sae!(SAE);
40982        let a = a.as_f32x4();
40983        let b = b.as_f32x4();
40984        let c = c.as_i32x4();
40985        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
40986        let fixupimm: f32 = simd_extract!(r, 0);
40987        let r = simd_insert!(a, 0, fixupimm);
40988        transmute(r)
40989    }
40990}
40991
40992/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40993/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40994///
40995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40996#[inline]
40997#[target_feature(enable = "avx512f")]
40998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40999#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
41000#[rustc_legacy_const_generics(4, 5)]
41001pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
41002    k: __mmask8,
41003    a: __m128,
41004    b: __m128,
41005    c: __m128i,
41006) -> __m128 {
41007    unsafe {
41008        static_assert_uimm_bits!(IMM8, 8);
41009        static_assert_mantissas_sae!(SAE);
41010        let a = a.as_f32x4();
41011        let b = b.as_f32x4();
41012        let c = c.as_i32x4();
41013        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
41014        let fixupimm: f32 = simd_extract!(r, 0);
41015        let r = simd_insert!(a, 0, fixupimm);
41016        transmute(r)
41017    }
41018}
41019
41020/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41021/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41022///
41023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
41024#[inline]
41025#[target_feature(enable = "avx512f")]
41026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41027#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41028#[rustc_legacy_const_generics(3, 4)]
41029pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41030    a: __m128d,
41031    b: __m128d,
41032    c: __m128i,
41033) -> __m128d {
41034    unsafe {
41035        static_assert_uimm_bits!(IMM8, 8);
41036        static_assert_mantissas_sae!(SAE);
41037        let a = a.as_f64x2();
41038        let b = b.as_f64x2();
41039        let c = c.as_i64x2();
41040        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
41041        let fixupimm: f64 = simd_extract!(r, 0);
41042        let r = simd_insert!(a, 0, fixupimm);
41043        transmute(r)
41044    }
41045}
41046
41047/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41048/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41049///
41050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
41051#[inline]
41052#[target_feature(enable = "avx512f")]
41053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41054#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41055#[rustc_legacy_const_generics(4, 5)]
41056pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41057    a: __m128d,
41058    k: __mmask8,
41059    b: __m128d,
41060    c: __m128i,
41061) -> __m128d {
41062    unsafe {
41063        static_assert_uimm_bits!(IMM8, 8);
41064        static_assert_mantissas_sae!(SAE);
41065        let a = a.as_f64x2();
41066        let b = b.as_f64x2();
41067        let c = c.as_i64x2();
41068        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
41069        let fixupimm: f64 = simd_extract!(r, 0);
41070        let r = simd_insert!(a, 0, fixupimm);
41071        transmute(r)
41072    }
41073}
41074
41075/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41076/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41077///
41078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
41079#[inline]
41080#[target_feature(enable = "avx512f")]
41081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41082#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41083#[rustc_legacy_const_generics(4, 5)]
41084pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41085    k: __mmask8,
41086    a: __m128d,
41087    b: __m128d,
41088    c: __m128i,
41089) -> __m128d {
41090    unsafe {
41091        static_assert_uimm_bits!(IMM8, 8);
41092        static_assert_mantissas_sae!(SAE);
41093        let a = a.as_f64x2();
41094        let b = b.as_f64x2();
41095        let c = c.as_i64x2();
41096        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
41097        let fixupimm: f64 = simd_extract!(r, 0);
41098        let r = simd_insert!(a, 0, fixupimm);
41099        transmute(r)
41100    }
41101}
41102
41103/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41104///
41105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41106#[inline]
41107#[target_feature(enable = "avx512f")]
41108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41109#[cfg_attr(test, assert_instr(vcvtss2sd))]
41110pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41111    unsafe {
41112        transmute(vcvtss2sd(
41113            a.as_f64x2(),
41114            b.as_f32x4(),
41115            src.as_f64x2(),
41116            k,
41117            _MM_FROUND_CUR_DIRECTION,
41118        ))
41119    }
41120}
41121
41122/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41123///
41124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41125#[inline]
41126#[target_feature(enable = "avx512f")]
41127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41128#[cfg_attr(test, assert_instr(vcvtss2sd))]
41129pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41130    unsafe {
41131        transmute(vcvtss2sd(
41132            a.as_f64x2(),
41133            b.as_f32x4(),
41134            f64x2::ZERO,
41135            k,
41136            _MM_FROUND_CUR_DIRECTION,
41137        ))
41138    }
41139}
41140
41141/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41142///
41143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41144#[inline]
41145#[target_feature(enable = "avx512f")]
41146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41147#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41148pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41149    unsafe {
41150        transmute(vcvtsd2ss(
41151            a.as_f32x4(),
41152            b.as_f64x2(),
41153            src.as_f32x4(),
41154            k,
41155            _MM_FROUND_CUR_DIRECTION,
41156        ))
41157    }
41158}
41159
41160/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41161///
41162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41163#[inline]
41164#[target_feature(enable = "avx512f")]
41165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41166#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41167pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41168    unsafe {
41169        transmute(vcvtsd2ss(
41170            a.as_f32x4(),
41171            b.as_f64x2(),
41172            f32x4::ZERO,
41173            k,
41174            _MM_FROUND_CUR_DIRECTION,
41175        ))
41176    }
41177}
41178
41179/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41180/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41181///
41182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41183#[inline]
41184#[target_feature(enable = "avx512f")]
41185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41186#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41187#[rustc_legacy_const_generics(2)]
41188pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41189    unsafe {
41190        static_assert_sae!(SAE);
41191        let a = a.as_f64x2();
41192        let b = b.as_f32x4();
41193        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
41194        transmute(r)
41195    }
41196}
41197
41198/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41199/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41200///
41201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41202#[inline]
41203#[target_feature(enable = "avx512f")]
41204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41205#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41206#[rustc_legacy_const_generics(4)]
41207pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41208    src: __m128d,
41209    k: __mmask8,
41210    a: __m128d,
41211    b: __m128,
41212) -> __m128d {
41213    unsafe {
41214        static_assert_sae!(SAE);
41215        let a = a.as_f64x2();
41216        let b = b.as_f32x4();
41217        let src = src.as_f64x2();
41218        let r = vcvtss2sd(a, b, src, k, SAE);
41219        transmute(r)
41220    }
41221}
41222
41223/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41225///
41226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41227#[inline]
41228#[target_feature(enable = "avx512f")]
41229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41230#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41231#[rustc_legacy_const_generics(3)]
41232pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41233    unsafe {
41234        static_assert_sae!(SAE);
41235        let a = a.as_f64x2();
41236        let b = b.as_f32x4();
41237        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
41238        transmute(r)
41239    }
41240}
41241
41242/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41243/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41244/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41245/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41246/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41247/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41248/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41249///
41250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41251#[inline]
41252#[target_feature(enable = "avx512f")]
41253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41254#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41255#[rustc_legacy_const_generics(2)]
41256pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41257    unsafe {
41258        static_assert_rounding!(ROUNDING);
41259        let a = a.as_f32x4();
41260        let b = b.as_f64x2();
41261        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41262        transmute(r)
41263    }
41264}
41265
41266/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41273///
41274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41275#[inline]
41276#[target_feature(enable = "avx512f")]
41277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41278#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41279#[rustc_legacy_const_generics(4)]
41280pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41281    src: __m128,
41282    k: __mmask8,
41283    a: __m128,
41284    b: __m128d,
41285) -> __m128 {
41286    unsafe {
41287        static_assert_rounding!(ROUNDING);
41288        let a = a.as_f32x4();
41289        let b = b.as_f64x2();
41290        let src = src.as_f32x4();
41291        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
41292        transmute(r)
41293    }
41294}
41295
41296/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41297/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41298/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41299/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41300/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41301/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41302/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41303///
41304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41305#[inline]
41306#[target_feature(enable = "avx512f")]
41307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41308#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41309#[rustc_legacy_const_generics(3)]
41310pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41311    unsafe {
41312        static_assert_rounding!(ROUNDING);
41313        let a = a.as_f32x4();
41314        let b = b.as_f64x2();
41315        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
41316        transmute(r)
41317    }
41318}
41319
41320/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41321/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41322/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41323/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41324/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41325/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41326/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41327///
41328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41329#[inline]
41330#[target_feature(enable = "avx512f")]
41331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41332#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41333#[rustc_legacy_const_generics(1)]
41334pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41335    unsafe {
41336        static_assert_rounding!(ROUNDING);
41337        let a = a.as_f32x4();
41338        vcvtss2si(a, ROUNDING)
41339    }
41340}
41341
41342/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41349///
41350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41351#[inline]
41352#[target_feature(enable = "avx512f")]
41353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41354#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41355#[rustc_legacy_const_generics(1)]
41356pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41357    unsafe {
41358        static_assert_rounding!(ROUNDING);
41359        let a = a.as_f32x4();
41360        vcvtss2si(a, ROUNDING)
41361    }
41362}
41363
41364/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41365/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41366/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41367/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41368/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41369/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41370/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41371///
41372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41373#[inline]
41374#[target_feature(enable = "avx512f")]
41375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41376#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41377#[rustc_legacy_const_generics(1)]
41378pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41379    unsafe {
41380        static_assert_rounding!(ROUNDING);
41381        let a = a.as_f32x4();
41382        vcvtss2usi(a, ROUNDING)
41383    }
41384}
41385
41386/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41387///
41388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41389#[inline]
41390#[target_feature(enable = "avx512f")]
41391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41392#[cfg_attr(test, assert_instr(vcvtss2si))]
41393pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41394    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41395}
41396
41397/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41398///
41399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41400#[inline]
41401#[target_feature(enable = "avx512f")]
41402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41403#[cfg_attr(test, assert_instr(vcvtss2usi))]
41404pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41405    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41406}
41407
41408/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41409/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41410/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41411/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41412/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41413/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41414/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41415///
41416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41417#[inline]
41418#[target_feature(enable = "avx512f")]
41419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41420#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41421#[rustc_legacy_const_generics(1)]
41422pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41423    unsafe {
41424        static_assert_rounding!(ROUNDING);
41425        let a = a.as_f64x2();
41426        vcvtsd2si(a, ROUNDING)
41427    }
41428}
41429
41430/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41431/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41432/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41433/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41434/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41435/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41436/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41437///
41438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41439#[inline]
41440#[target_feature(enable = "avx512f")]
41441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41442#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41443#[rustc_legacy_const_generics(1)]
41444pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41445    unsafe {
41446        static_assert_rounding!(ROUNDING);
41447        let a = a.as_f64x2();
41448        vcvtsd2si(a, ROUNDING)
41449    }
41450}
41451
41452/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41453/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41454/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41455/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41456/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41457/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41458/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41459///
41460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41461#[inline]
41462#[target_feature(enable = "avx512f")]
41463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41464#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41465#[rustc_legacy_const_generics(1)]
41466pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41467    unsafe {
41468        static_assert_rounding!(ROUNDING);
41469        let a = a.as_f64x2();
41470        vcvtsd2usi(a, ROUNDING)
41471    }
41472}
41473
41474/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41475///
41476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41477#[inline]
41478#[target_feature(enable = "avx512f")]
41479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41480#[cfg_attr(test, assert_instr(vcvtsd2si))]
41481pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41482    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41483}
41484
41485/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41486///
41487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41488#[inline]
41489#[target_feature(enable = "avx512f")]
41490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41491#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41492pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41493    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41494}
41495
41496/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41497///
41498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41504///
41505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41506#[inline]
41507#[target_feature(enable = "avx512f")]
41508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41509#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41510#[rustc_legacy_const_generics(2)]
41511pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41512    unsafe {
41513        static_assert_rounding!(ROUNDING);
41514        let a = a.as_f32x4();
41515        let r = vcvtsi2ss(a, b, ROUNDING);
41516        transmute(r)
41517    }
41518}
41519
41520/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41521///
41522/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41523/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41524/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41525/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41526/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41527/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41528///
41529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41530#[inline]
41531#[target_feature(enable = "avx512f")]
41532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41533#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41534#[rustc_legacy_const_generics(2)]
41535pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41536    unsafe {
41537        static_assert_rounding!(ROUNDING);
41538        let a = a.as_f32x4();
41539        let r = vcvtsi2ss(a, b, ROUNDING);
41540        transmute(r)
41541    }
41542}
41543
41544/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41545/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41546/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41547/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41548/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41549/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41550/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41551///
41552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41553#[inline]
41554#[target_feature(enable = "avx512f")]
41555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41556#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41557#[rustc_legacy_const_generics(2)]
41558pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41559    unsafe {
41560        static_assert_rounding!(ROUNDING);
41561        let a = a.as_f32x4();
41562        let r = vcvtusi2ss(a, b, ROUNDING);
41563        transmute(r)
41564    }
41565}
41566
41567/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41568///
41569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41570#[inline]
41571#[target_feature(enable = "avx512f")]
41572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41573#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41574pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41575    unsafe {
41576        let b = b as f32;
41577        simd_insert!(a, 0, b)
41578    }
41579}
41580
41581/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41582///
41583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41584#[inline]
41585#[target_feature(enable = "avx512f")]
41586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41587#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41588pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41589    unsafe {
41590        let b = b as f64;
41591        simd_insert!(a, 0, b)
41592    }
41593}
41594
41595/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41596/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41597///
41598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41599#[inline]
41600#[target_feature(enable = "avx512f")]
41601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41602#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41603#[rustc_legacy_const_generics(1)]
41604pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41605    unsafe {
41606        static_assert_sae!(SAE);
41607        let a = a.as_f32x4();
41608        vcvttss2si(a, SAE)
41609    }
41610}
41611
41612/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41613/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41614///
41615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41616#[inline]
41617#[target_feature(enable = "avx512f")]
41618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41619#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41620#[rustc_legacy_const_generics(1)]
41621pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41622    unsafe {
41623        static_assert_sae!(SAE);
41624        let a = a.as_f32x4();
41625        vcvttss2si(a, SAE)
41626    }
41627}
41628
41629/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41630/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41631///
41632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41633#[inline]
41634#[target_feature(enable = "avx512f")]
41635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41636#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41637#[rustc_legacy_const_generics(1)]
41638pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41639    unsafe {
41640        static_assert_sae!(SAE);
41641        let a = a.as_f32x4();
41642        vcvttss2usi(a, SAE)
41643    }
41644}
41645
41646/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41647///
41648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41649#[inline]
41650#[target_feature(enable = "avx512f")]
41651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41652#[cfg_attr(test, assert_instr(vcvttss2si))]
41653pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41654    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41655}
41656
41657/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41658///
41659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41660#[inline]
41661#[target_feature(enable = "avx512f")]
41662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41663#[cfg_attr(test, assert_instr(vcvttss2usi))]
41664pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41665    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41666}
41667
41668/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41669/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41670///
41671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41672#[inline]
41673#[target_feature(enable = "avx512f")]
41674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41675#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41676#[rustc_legacy_const_generics(1)]
41677pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41678    unsafe {
41679        static_assert_sae!(SAE);
41680        let a = a.as_f64x2();
41681        vcvttsd2si(a, SAE)
41682    }
41683}
41684
41685/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41686/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41687///
41688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41689#[inline]
41690#[target_feature(enable = "avx512f")]
41691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41692#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41693#[rustc_legacy_const_generics(1)]
41694pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41695    unsafe {
41696        static_assert_sae!(SAE);
41697        let a = a.as_f64x2();
41698        vcvttsd2si(a, SAE)
41699    }
41700}
41701
41702/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41703/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41704///
41705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41706#[inline]
41707#[target_feature(enable = "avx512f")]
41708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41709#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41710#[rustc_legacy_const_generics(1)]
41711pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41712    unsafe {
41713        static_assert_sae!(SAE);
41714        let a = a.as_f64x2();
41715        vcvttsd2usi(a, SAE)
41716    }
41717}
41718
41719/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41720///
41721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41722#[inline]
41723#[target_feature(enable = "avx512f")]
41724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41725#[cfg_attr(test, assert_instr(vcvttsd2si))]
41726pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41727    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41728}
41729
41730/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41731///
41732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41733#[inline]
41734#[target_feature(enable = "avx512f")]
41735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41736#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41737pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41738    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41739}
41740
41741/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41742///
41743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41744#[inline]
41745#[target_feature(enable = "avx512f")]
41746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41747#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41748pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41749    unsafe {
41750        let b = b as f32;
41751        simd_insert!(a, 0, b)
41752    }
41753}
41754
41755/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41756///
41757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41758#[inline]
41759#[target_feature(enable = "avx512f")]
41760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41761#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41762pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41763    unsafe {
41764        let b = b as f64;
41765        simd_insert!(a, 0, b)
41766    }
41767}
41768
41769/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41771///
41772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41773#[inline]
41774#[target_feature(enable = "avx512f")]
41775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41776#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41777#[rustc_legacy_const_generics(2, 3)]
41778pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41779    unsafe {
41780        static_assert_uimm_bits!(IMM5, 5);
41781        static_assert_mantissas_sae!(SAE);
41782        let a = a.as_f32x4();
41783        let b = b.as_f32x4();
41784        vcomiss(a, b, IMM5, SAE)
41785    }
41786}
41787
41788/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41789/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41790///
41791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41792#[inline]
41793#[target_feature(enable = "avx512f")]
41794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41795#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41796#[rustc_legacy_const_generics(2, 3)]
41797pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41798    unsafe {
41799        static_assert_uimm_bits!(IMM5, 5);
41800        static_assert_mantissas_sae!(SAE);
41801        let a = a.as_f64x2();
41802        let b = b.as_f64x2();
41803        vcomisd(a, b, IMM5, SAE)
41804    }
41805}
41806
41807/// Equal
41808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41809pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41810/// Less-than
41811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41812pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41813/// Less-than-or-equal
41814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41815pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41816/// False
41817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41818pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41819/// Not-equal
41820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41821pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41822/// Not less-than
41823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41824pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41825/// Not less-than-or-equal
41826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41827pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41828/// True
41829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41830pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41831
41832/// interval [1, 2)
41833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41834pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41835/// interval [0.5, 2)
41836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41837pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41838/// interval [0.5, 1)
41839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41840pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41841/// interval [0.75, 1.5)
41842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41843pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41844
41845/// sign = sign(SRC)
41846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41847pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41848/// sign = 0
41849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41850pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41851/// DEST = NaN if sign(SRC) = 1
41852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41853pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41854
41855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41856pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41858pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41860pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41862pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41864pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41866pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41868pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41870pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41872pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41874pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41876pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41878pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41880pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41882pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41884pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41886pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41888pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41890pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41892pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41894pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41896pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41898pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41900pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41902pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41904pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41906pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41908pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41910pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41912pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41914pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41916pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41918pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41920pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41922pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41924pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41926pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41928pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41930pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41932pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41934pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41936pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41938pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41940pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41942pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41944pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41946pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41948pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41950pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41952pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41954pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41956pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41958pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41960pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41962pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41964pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41966pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41968pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41970pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41972pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41974pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41976pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41978pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41980pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41982pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41984pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41986pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41988pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41990pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41992pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41994pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41996pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41998pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42000pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
42001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42002pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
42003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42004pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
42005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42006pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
42007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42008pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
42009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42010pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
42011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42012pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
42013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42014pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
42015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42016pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
42017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42018pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
42019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42020pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
42021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42022pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
42023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42024pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
42025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42026pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
42027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42028pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
42029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42030pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
42031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42032pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
42033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42034pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
42035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42036pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
42037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42038pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
42039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42040pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
42041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42042pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
42043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42044pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
42045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42046pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
42047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42048pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
42049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42050pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
42051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42052pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
42053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42054pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
42055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42056pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
42057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42058pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
42059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42060pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
42061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42062pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
42063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42064pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
42065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42066pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
42067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42068pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
42069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42070pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
42071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42072pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
42073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42074pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
42075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42076pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
42077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42078pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
42079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42080pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
42081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42082pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
42083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42084pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
42085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42086pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
42087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42088pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
42089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42090pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
42091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42092pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
42093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42094pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
42095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42096pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42098pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42100pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42102pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42104pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42106pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42108pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42110pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42112pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42114pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42116pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42118pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42120pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42122pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42124pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42126pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42128pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42130pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42132pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42134pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42136pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42138pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42140pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42142pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42144pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42146pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42148pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42150pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42152pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42154pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42156pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42158pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42160pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42162pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42164pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42166pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42168pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42170pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42172pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42174pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42176pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42178pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42180pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42182pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42184pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42186pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42188pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42190pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42192pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42194pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42196pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42198pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42200pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42202pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42204pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42206pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42208pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42210pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42212pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42214pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42216pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42218pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42220pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42222pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42224pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42226pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42228pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42230pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42232pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42234pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42236pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42238pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42240pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42242pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42244pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42246pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42248pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42250pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42252pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42254pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42256pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42258pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42260pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42262pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42264pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42266pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42268pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42270pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42272pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42274pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42276pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42278pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42280pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42282pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42284pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42286pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42288pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42290pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42292pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42294pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42296pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42298pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42300pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42302pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42304pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42306pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42308pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42310pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42312pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42314pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42316pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42318pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42320pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42322pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42324pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42326pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42328pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42330pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42332pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42334pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42336pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42338pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42340pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42342pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42344pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42346pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42348pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42350pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42352pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42354pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42356pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42358pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42360pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42362pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42364pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42366pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42367
42368#[allow(improper_ctypes)]
42369unsafe extern "C" {
42370    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42371    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42372    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42373    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42374
42375    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42376    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42377    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42378    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42379
42380    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42381    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42382    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42383    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42384
42385    #[link_name = "llvm.x86.avx512.add.ps.512"]
42386    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42387    #[link_name = "llvm.x86.avx512.add.pd.512"]
42388    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42389    #[link_name = "llvm.x86.avx512.sub.ps.512"]
42390    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42391    #[link_name = "llvm.x86.avx512.sub.pd.512"]
42392    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42393    #[link_name = "llvm.x86.avx512.mul.ps.512"]
42394    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42395    #[link_name = "llvm.x86.avx512.mul.pd.512"]
42396    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42397    #[link_name = "llvm.x86.avx512.div.ps.512"]
42398    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42399    #[link_name = "llvm.x86.avx512.div.pd.512"]
42400    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42401
42402    #[link_name = "llvm.x86.avx512.max.ps.512"]
42403    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42404    #[link_name = "llvm.x86.avx512.max.pd.512"]
42405    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42406    #[link_name = "llvm.x86.avx512.min.ps.512"]
42407    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42408    #[link_name = "llvm.x86.avx512.min.pd.512"]
42409    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42410
42411    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42412    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42413
42414    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42415    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42416    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42417    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42418
42419    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42420    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42421    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42422    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42423    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42424    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42425
42426    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42427    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42428    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42429    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42430    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42431    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42432
42433    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42434    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42435    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42436    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42437    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42438    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42439
42440    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42441    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42442    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42443    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42444    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42445    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42446
42447    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42448    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42449    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42450    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42451    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42452    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42453
42454    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42455    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42456    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42457    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42458    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42459    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42460
42461    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42462    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42463    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42464    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42465    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42466    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42467
42468    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42469    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42470    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42471    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42472    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42473    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42474
42475    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42476    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42477    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42478    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42479    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42480    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42481
42482    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42483    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42484    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42485    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42486    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42487    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42488
42489    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42490    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42491    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42492    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42493    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42494    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42495
42496    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42497    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42498    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42499    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42500    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42501    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42502
42503    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42504    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42505    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42506    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42507    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42508    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42509
42510    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42511    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42512    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42513    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42514    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42515    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42516
42517    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42518    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42519    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42520    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42521    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42522    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42523
42524    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42525    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42526    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42527    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42528    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42529    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42530
42531    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42532    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42533    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42534    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42535    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42536    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42537
42538    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42539    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42540
42541    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42542    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42543    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42544    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42545    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42546    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42547
42548    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42549    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42550    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42551    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42552
42553    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42554    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42555
42556    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42557    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42558    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42559    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42560    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42561    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42562
42563    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42564    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42565    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42566    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42567
42568    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42569    fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
42570    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42571    fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42572    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42573    fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42574
42575    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42576    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42577
42578    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42579    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42580    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42581    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42582    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42583    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42584
42585    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42586    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42587    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42588    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42589    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42590    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42591
42592    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42593    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42594    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42595    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42596    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42597    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42598
42599    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42600    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42601    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42602    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42603    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42604    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42605
42606    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42607    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42608    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42609    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42610    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42611    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42612
42613    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42614    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42615    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42616    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42617    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42618    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42619    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42620    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42621    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42622    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42623
42624    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42625    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42626    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42627    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42628    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42629    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42630
42631    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42632    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42633    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42634    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42635    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42636    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42637
42638    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42639    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42640    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42641    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42642    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42643    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42644
42645    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42646    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42647    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42648    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42649    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42650    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42651
42652    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42653    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42654    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42655    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42656    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42657    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42658
42659    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42660    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42661    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42662    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42663    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42664    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42665
42666    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42667    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42668    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42669    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42670    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42671    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42672
42673    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42674    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42675    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42676    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42677    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42678    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42679
42680    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42681    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42682    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42683    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42684    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42685    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42686
42687    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42688    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42689    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42690    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42691    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42692    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42693
42694    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42695    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42696    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42697    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42698    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42699    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42700
42701    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42702    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42703    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42704    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42705    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42706    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42707
42708    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42709    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42710    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42711    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42712    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42713    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42714
42715    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42716    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42717    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42718    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42719    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42720    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42721
42722    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42723    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42724    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42725    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42726    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42727    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42728
42729    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42730    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42731
42732    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42733    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42734    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42735    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42736    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42737    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42738
42739    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42740    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42741    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42742    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42743    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42744    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42745
42746    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42747    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42748    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42749    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42750    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42751    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42752
42753    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42754    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42755    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42756    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42757    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42758    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42759
42760    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42761    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42762    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42763    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42764    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42765    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42766
42767    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42768    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42769    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42770    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42771    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42772    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42773
42774    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42775    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42776    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42777    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42778    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42779    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42780
42781    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42782    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42783    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42784    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42785    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42786    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42787
42788    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42789    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42790    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42791    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42792    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42793    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42794
42795    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42796    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42797    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42798    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42799    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42800    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42801
42802    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42803    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42804    #[link_name = "llvm.x86.avx512.gather.dps.512"]
42805    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42806    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42807    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42808    #[link_name = "llvm.x86.avx512.gather.qps.512"]
42809    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42810    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42811    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42812    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42813    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42814    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42815    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42816    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42817    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42818
42819    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42820    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42821    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42822    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42823    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42824    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42825    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42826    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42827    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42828    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42829
42830    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42831    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42832    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42833    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42834    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42835    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42836
42837    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42838    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42839    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42840    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42841    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42842    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42843    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42844    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42845    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42846    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42847    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42848    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42849    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42850    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42851    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42852    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42853
42854    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42855    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42856    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42857    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42858    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42859    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42860    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42861    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42862    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42863    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42864    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42865    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42866    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42867    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42868    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42869    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42870
42871    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42872    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42873    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42874    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42875    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42876    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42877    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42878    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42879    #[link_name = "llvm.x86.avx512.gather3div4.si"]
42880    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42881    #[link_name = "llvm.x86.avx512.gather3div2.di"]
42882    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42883    #[link_name = "llvm.x86.avx512.gather3div2.df"]
42884    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42885    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42886    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42887
42888    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42889    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42890    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42891    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42892    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42893    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42894    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42895    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42896    #[link_name = "llvm.x86.avx512.gather3div8.si"]
42897    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42898    #[link_name = "llvm.x86.avx512.gather3div4.di"]
42899    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42900    #[link_name = "llvm.x86.avx512.gather3div4.df"]
42901    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42902    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42903    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42904
42905    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42906    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42907    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42908    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42909
42910    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42911    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42912    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42913    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42914    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42915    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42916
42917    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42918    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42919    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42920    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42921    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42922    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42923
42924    #[link_name = "llvm.x86.avx512.psll.d.512"]
42925    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
42926    #[link_name = "llvm.x86.avx512.psrl.d.512"]
42927    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42928    #[link_name = "llvm.x86.avx512.psll.q.512"]
42929    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42930    #[link_name = "llvm.x86.avx512.psrl.q.512"]
42931    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42932
42933    #[link_name = "llvm.x86.avx512.psra.d.512"]
42934    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42935
42936    #[link_name = "llvm.x86.avx512.psra.q.512"]
42937    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42938    #[link_name = "llvm.x86.avx512.psra.q.256"]
42939    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42940    #[link_name = "llvm.x86.avx512.psra.q.128"]
42941    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42942
42943    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
42944    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
42945    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
42946    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
42947
42948    #[link_name = "llvm.x86.avx512.permvar.si.512"]
42949    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
42950
42951    #[link_name = "llvm.x86.avx512.permvar.di.512"]
42952    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
42953    #[link_name = "llvm.x86.avx512.permvar.di.256"]
42954    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
42955
42956    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
42957    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
42958
42959    #[link_name = "llvm.x86.avx512.permvar.df.512"]
42960    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
42961    #[link_name = "llvm.x86.avx512.permvar.df.256"]
42962    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
42963
42964    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
42965    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
42966    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
42967    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
42968    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
42969    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
42970
42971    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
42972    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
42973    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
42974    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
42975    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
42976    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
42977
42978    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
42979    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
42980    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
42981    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
42982    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
42983    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
42984
42985    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
42986    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
42987    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
42988    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
42989    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
42990    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
42991
42992    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
42993    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
42994    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
42995    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
42996    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
42997    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
42998
42999    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
43000    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43001    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
43002    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43003    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
43004    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43005
43006    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
43007    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43008    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
43009    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43010    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
43011    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43012
43013    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
43014    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43015    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
43016    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43017    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
43018    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43019
43020    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
43021    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43022    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43023    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43024    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43025    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43026
43027    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43028    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43029    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43030    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43031    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43032    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43033
43034    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43035    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43036    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43037    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43038    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43039    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43040
43041    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43042    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43043    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43044    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43045    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43046    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43047
43048    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43049    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43050    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43051    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43052    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43053    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43054
43055    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43056    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43057    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43058    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43059    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43060    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43061
43062    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43063    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43064    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43065    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43066    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43067    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43068
43069    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43070    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43071    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43072    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43073    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43074    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43075
43076    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43077    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43078    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43079    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43080    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43081    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43082    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43083    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43084    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43085    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43086    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43087    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43088    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43089    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43090    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43091    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43092    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43093    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43094    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43095    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43096    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43097    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43098    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43099    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43100    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43101    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43102    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43103    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43104    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43105    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43106    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43107    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43108    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43109    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43110    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43111    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43112
43113    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43114    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43115    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43116    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43117    #[link_name = "llvm.x86.avx512.rcp14.ss"]
43118    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43119    #[link_name = "llvm.x86.avx512.rcp14.sd"]
43120    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43121
43122    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43123    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43124    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43125    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43126    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43127    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43128    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43129    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43130
43131    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43132    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43133    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43134    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43135
43136    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43137    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43138    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43139    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43140    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43141    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43142    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43143    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43144
43145    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43146    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43147    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43148    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43149
43150    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43151    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43152    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43153    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43154
43155    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43156    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43157    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43158    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43159
43160    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43161    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43162
43163    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43164    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43165
43166    #[link_name = "llvm.x86.avx512.cvttss2si"]
43167    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43168    #[link_name = "llvm.x86.avx512.cvttss2usi"]
43169    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43170
43171    #[link_name = "llvm.x86.avx512.cvttsd2si"]
43172    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43173    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43174    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43175
43176    #[link_name = "llvm.x86.avx512.vcomi.ss"]
43177    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43178    #[link_name = "llvm.x86.avx512.vcomi.sd"]
43179    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43180
43181    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43182    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43183    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43184    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43185    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43186    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43187    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43188    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43189    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43190    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43191    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43192    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43193    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43194    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43195    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43196    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43197    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43198    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43199    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43200    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43201    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43202    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43203    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43204    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43205
43206}
43207
43208#[cfg(test)]
43209mod tests {
43210
43211    use stdarch_test::simd_test;
43212
43213    use crate::core_arch::x86::*;
43214    use crate::hint::black_box;
43215    use crate::mem::{self};
43216
43217    #[simd_test(enable = "avx512f")]
43218    unsafe fn test_mm512_abs_epi32() {
43219        #[rustfmt::skip]
43220        let a = _mm512_setr_epi32(
43221            0, 1, -1, i32::MAX,
43222            i32::MIN, 100, -100, -32,
43223            0, 1, -1, i32::MAX,
43224            i32::MIN, 100, -100, -32,
43225        );
43226        let r = _mm512_abs_epi32(a);
43227        #[rustfmt::skip]
43228        let e = _mm512_setr_epi32(
43229            0, 1, 1, i32::MAX,
43230            i32::MAX.wrapping_add(1), 100, 100, 32,
43231            0, 1, 1, i32::MAX,
43232            i32::MAX.wrapping_add(1), 100, 100, 32,
43233        );
43234        assert_eq_m512i(r, e);
43235    }
43236
43237    #[simd_test(enable = "avx512f")]
43238    unsafe fn test_mm512_mask_abs_epi32() {
43239        #[rustfmt::skip]
43240        let a = _mm512_setr_epi32(
43241            0, 1, -1, i32::MAX,
43242            i32::MIN, 100, -100, -32,
43243            0, 1, -1, i32::MAX,
43244            i32::MIN, 100, -100, -32,
43245        );
43246        let r = _mm512_mask_abs_epi32(a, 0, a);
43247        assert_eq_m512i(r, a);
43248        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43249        #[rustfmt::skip]
43250        let e = _mm512_setr_epi32(
43251            0, 1, 1, i32::MAX,
43252            i32::MAX.wrapping_add(1), 100, 100, 32,
43253            0, 1, -1, i32::MAX,
43254            i32::MIN, 100, -100, -32,
43255        );
43256        assert_eq_m512i(r, e);
43257    }
43258
43259    #[simd_test(enable = "avx512f")]
43260    unsafe fn test_mm512_maskz_abs_epi32() {
43261        #[rustfmt::skip]
43262        let a = _mm512_setr_epi32(
43263            0, 1, -1, i32::MAX,
43264            i32::MIN, 100, -100, -32,
43265            0, 1, -1, i32::MAX,
43266            i32::MIN, 100, -100, -32,
43267        );
43268        let r = _mm512_maskz_abs_epi32(0, a);
43269        assert_eq_m512i(r, _mm512_setzero_si512());
43270        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43271        #[rustfmt::skip]
43272        let e = _mm512_setr_epi32(
43273            0, 1, 1, i32::MAX,
43274            i32::MAX.wrapping_add(1), 100, 100, 32,
43275            0, 0, 0, 0,
43276            0, 0, 0, 0,
43277        );
43278        assert_eq_m512i(r, e);
43279    }
43280
43281    #[simd_test(enable = "avx512f,avx512vl")]
43282    unsafe fn test_mm256_mask_abs_epi32() {
43283        #[rustfmt::skip]
43284        let a = _mm256_setr_epi32(
43285            0, 1, -1, i32::MAX,
43286            i32::MIN, 100, -100, -32,
43287        );
43288        let r = _mm256_mask_abs_epi32(a, 0, a);
43289        assert_eq_m256i(r, a);
43290        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43291        #[rustfmt::skip]
43292        let e = _mm256_setr_epi32(
43293            0, 1, 1, i32::MAX,
43294            i32::MAX.wrapping_add(1), 100, -100, -32,
43295        );
43296        assert_eq_m256i(r, e);
43297    }
43298
43299    #[simd_test(enable = "avx512f,avx512vl")]
43300    unsafe fn test_mm256_maskz_abs_epi32() {
43301        #[rustfmt::skip]
43302        let a = _mm256_setr_epi32(
43303            0, 1, -1, i32::MAX,
43304            i32::MIN, 100, -100, -32,
43305        );
43306        let r = _mm256_maskz_abs_epi32(0, a);
43307        assert_eq_m256i(r, _mm256_setzero_si256());
43308        let r = _mm256_maskz_abs_epi32(0b00001111, a);
43309        #[rustfmt::skip]
43310        let e = _mm256_setr_epi32(
43311            0, 1, 1, i32::MAX,
43312            0, 0, 0, 0,
43313        );
43314        assert_eq_m256i(r, e);
43315    }
43316
43317    #[simd_test(enable = "avx512f,avx512vl")]
43318    unsafe fn test_mm_mask_abs_epi32() {
43319        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43320        let r = _mm_mask_abs_epi32(a, 0, a);
43321        assert_eq_m128i(r, a);
43322        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43323        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43324        assert_eq_m128i(r, e);
43325    }
43326
43327    #[simd_test(enable = "avx512f,avx512vl")]
43328    unsafe fn test_mm_maskz_abs_epi32() {
43329        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43330        let r = _mm_maskz_abs_epi32(0, a);
43331        assert_eq_m128i(r, _mm_setzero_si128());
43332        let r = _mm_maskz_abs_epi32(0b00001111, a);
43333        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43334        assert_eq_m128i(r, e);
43335    }
43336
43337    #[simd_test(enable = "avx512f")]
43338    unsafe fn test_mm512_abs_ps() {
43339        #[rustfmt::skip]
43340        let a = _mm512_setr_ps(
43341            0., 1., -1., f32::MAX,
43342            f32::MIN, 100., -100., -32.,
43343            0., 1., -1., f32::MAX,
43344            f32::MIN, 100., -100., -32.,
43345        );
43346        let r = _mm512_abs_ps(a);
43347        #[rustfmt::skip]
43348        let e = _mm512_setr_ps(
43349            0., 1., 1., f32::MAX,
43350            f32::MAX, 100., 100., 32.,
43351            0., 1., 1., f32::MAX,
43352            f32::MAX, 100., 100., 32.,
43353        );
43354        assert_eq_m512(r, e);
43355    }
43356
43357    #[simd_test(enable = "avx512f")]
43358    unsafe fn test_mm512_mask_abs_ps() {
43359        #[rustfmt::skip]
43360        let a = _mm512_setr_ps(
43361            0., 1., -1., f32::MAX,
43362            f32::MIN, 100., -100., -32.,
43363            0., 1., -1., f32::MAX,
43364            f32::MIN, 100., -100., -32.,
43365        );
43366        let r = _mm512_mask_abs_ps(a, 0, a);
43367        assert_eq_m512(r, a);
43368        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43369        #[rustfmt::skip]
43370        let e = _mm512_setr_ps(
43371            0., 1., 1., f32::MAX,
43372            f32::MAX, 100., 100., 32.,
43373            0., 1., -1., f32::MAX,
43374            f32::MIN, 100., -100., -32.,
43375        );
43376        assert_eq_m512(r, e);
43377    }
43378
43379    #[simd_test(enable = "avx512f")]
43380    unsafe fn test_mm512_mask_mov_epi32() {
43381        let src = _mm512_set1_epi32(1);
43382        let a = _mm512_set1_epi32(2);
43383        let r = _mm512_mask_mov_epi32(src, 0, a);
43384        assert_eq_m512i(r, src);
43385        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43386        assert_eq_m512i(r, a);
43387    }
43388
43389    #[simd_test(enable = "avx512f")]
43390    unsafe fn test_mm512_maskz_mov_epi32() {
43391        let a = _mm512_set1_epi32(2);
43392        let r = _mm512_maskz_mov_epi32(0, a);
43393        assert_eq_m512i(r, _mm512_setzero_si512());
43394        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43395        assert_eq_m512i(r, a);
43396    }
43397
43398    #[simd_test(enable = "avx512f,avx512vl")]
43399    unsafe fn test_mm256_mask_mov_epi32() {
43400        let src = _mm256_set1_epi32(1);
43401        let a = _mm256_set1_epi32(2);
43402        let r = _mm256_mask_mov_epi32(src, 0, a);
43403        assert_eq_m256i(r, src);
43404        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43405        assert_eq_m256i(r, a);
43406    }
43407
43408    #[simd_test(enable = "avx512f,avx512vl")]
43409    unsafe fn test_mm256_maskz_mov_epi32() {
43410        let a = _mm256_set1_epi32(2);
43411        let r = _mm256_maskz_mov_epi32(0, a);
43412        assert_eq_m256i(r, _mm256_setzero_si256());
43413        let r = _mm256_maskz_mov_epi32(0b11111111, a);
43414        assert_eq_m256i(r, a);
43415    }
43416
43417    #[simd_test(enable = "avx512f,avx512vl")]
43418    unsafe fn test_mm_mask_mov_epi32() {
43419        let src = _mm_set1_epi32(1);
43420        let a = _mm_set1_epi32(2);
43421        let r = _mm_mask_mov_epi32(src, 0, a);
43422        assert_eq_m128i(r, src);
43423        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43424        assert_eq_m128i(r, a);
43425    }
43426
43427    #[simd_test(enable = "avx512f,avx512vl")]
43428    unsafe fn test_mm_maskz_mov_epi32() {
43429        let a = _mm_set1_epi32(2);
43430        let r = _mm_maskz_mov_epi32(0, a);
43431        assert_eq_m128i(r, _mm_setzero_si128());
43432        let r = _mm_maskz_mov_epi32(0b00001111, a);
43433        assert_eq_m128i(r, a);
43434    }
43435
43436    #[simd_test(enable = "avx512f")]
43437    unsafe fn test_mm512_mask_mov_ps() {
43438        let src = _mm512_set1_ps(1.);
43439        let a = _mm512_set1_ps(2.);
43440        let r = _mm512_mask_mov_ps(src, 0, a);
43441        assert_eq_m512(r, src);
43442        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43443        assert_eq_m512(r, a);
43444    }
43445
43446    #[simd_test(enable = "avx512f")]
43447    unsafe fn test_mm512_maskz_mov_ps() {
43448        let a = _mm512_set1_ps(2.);
43449        let r = _mm512_maskz_mov_ps(0, a);
43450        assert_eq_m512(r, _mm512_setzero_ps());
43451        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43452        assert_eq_m512(r, a);
43453    }
43454
43455    #[simd_test(enable = "avx512f,avx512vl")]
43456    unsafe fn test_mm256_mask_mov_ps() {
43457        let src = _mm256_set1_ps(1.);
43458        let a = _mm256_set1_ps(2.);
43459        let r = _mm256_mask_mov_ps(src, 0, a);
43460        assert_eq_m256(r, src);
43461        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43462        assert_eq_m256(r, a);
43463    }
43464
43465    #[simd_test(enable = "avx512f,avx512vl")]
43466    unsafe fn test_mm256_maskz_mov_ps() {
43467        let a = _mm256_set1_ps(2.);
43468        let r = _mm256_maskz_mov_ps(0, a);
43469        assert_eq_m256(r, _mm256_setzero_ps());
43470        let r = _mm256_maskz_mov_ps(0b11111111, a);
43471        assert_eq_m256(r, a);
43472    }
43473
43474    #[simd_test(enable = "avx512f,avx512vl")]
43475    unsafe fn test_mm_mask_mov_ps() {
43476        let src = _mm_set1_ps(1.);
43477        let a = _mm_set1_ps(2.);
43478        let r = _mm_mask_mov_ps(src, 0, a);
43479        assert_eq_m128(r, src);
43480        let r = _mm_mask_mov_ps(src, 0b00001111, a);
43481        assert_eq_m128(r, a);
43482    }
43483
43484    #[simd_test(enable = "avx512f,avx512vl")]
43485    unsafe fn test_mm_maskz_mov_ps() {
43486        let a = _mm_set1_ps(2.);
43487        let r = _mm_maskz_mov_ps(0, a);
43488        assert_eq_m128(r, _mm_setzero_ps());
43489        let r = _mm_maskz_mov_ps(0b00001111, a);
43490        assert_eq_m128(r, a);
43491    }
43492
43493    #[simd_test(enable = "avx512f")]
43494    unsafe fn test_mm512_add_epi32() {
43495        #[rustfmt::skip]
43496        let a = _mm512_setr_epi32(
43497            0, 1, -1, i32::MAX,
43498            i32::MIN, 100, -100, -32,
43499            0, 1, -1, i32::MAX,
43500            i32::MIN, 100, -100, -32,
43501        );
43502        let b = _mm512_set1_epi32(1);
43503        let r = _mm512_add_epi32(a, b);
43504        #[rustfmt::skip]
43505        let e = _mm512_setr_epi32(
43506            1, 2, 0, i32::MIN,
43507            i32::MIN + 1, 101, -99, -31,
43508            1, 2, 0, i32::MIN,
43509            i32::MIN + 1, 101, -99, -31,
43510        );
43511        assert_eq_m512i(r, e);
43512    }
43513
43514    #[simd_test(enable = "avx512f")]
43515    unsafe fn test_mm512_mask_add_epi32() {
43516        #[rustfmt::skip]
43517        let a = _mm512_setr_epi32(
43518            0, 1, -1, i32::MAX,
43519            i32::MIN, 100, -100, -32,
43520            0, 1, -1, i32::MAX,
43521            i32::MIN, 100, -100, -32,
43522        );
43523        let b = _mm512_set1_epi32(1);
43524        let r = _mm512_mask_add_epi32(a, 0, a, b);
43525        assert_eq_m512i(r, a);
43526        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43527        #[rustfmt::skip]
43528        let e = _mm512_setr_epi32(
43529            1, 2, 0, i32::MIN,
43530            i32::MIN + 1, 101, -99, -31,
43531            0, 1, -1, i32::MAX,
43532            i32::MIN, 100, -100, -32,
43533        );
43534        assert_eq_m512i(r, e);
43535    }
43536
43537    #[simd_test(enable = "avx512f")]
43538    unsafe fn test_mm512_maskz_add_epi32() {
43539        #[rustfmt::skip]
43540        let a = _mm512_setr_epi32(
43541            0, 1, -1, i32::MAX,
43542            i32::MIN, 100, -100, -32,
43543            0, 1, -1, i32::MAX,
43544            i32::MIN, 100, -100, -32,
43545        );
43546        let b = _mm512_set1_epi32(1);
43547        let r = _mm512_maskz_add_epi32(0, a, b);
43548        assert_eq_m512i(r, _mm512_setzero_si512());
43549        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43550        #[rustfmt::skip]
43551        let e = _mm512_setr_epi32(
43552            1, 2, 0, i32::MIN,
43553            i32::MIN + 1, 101, -99, -31,
43554            0, 0, 0, 0,
43555            0, 0, 0, 0,
43556        );
43557        assert_eq_m512i(r, e);
43558    }
43559
43560    #[simd_test(enable = "avx512f,avx512vl")]
43561    unsafe fn test_mm256_mask_add_epi32() {
43562        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43563        let b = _mm256_set1_epi32(1);
43564        let r = _mm256_mask_add_epi32(a, 0, a, b);
43565        assert_eq_m256i(r, a);
43566        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43567        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43568        assert_eq_m256i(r, e);
43569    }
43570
43571    #[simd_test(enable = "avx512f,avx512vl")]
43572    unsafe fn test_mm256_maskz_add_epi32() {
43573        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43574        let b = _mm256_set1_epi32(1);
43575        let r = _mm256_maskz_add_epi32(0, a, b);
43576        assert_eq_m256i(r, _mm256_setzero_si256());
43577        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43578        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43579        assert_eq_m256i(r, e);
43580    }
43581
43582    #[simd_test(enable = "avx512f,avx512vl")]
43583    unsafe fn test_mm_mask_add_epi32() {
43584        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43585        let b = _mm_set1_epi32(1);
43586        let r = _mm_mask_add_epi32(a, 0, a, b);
43587        assert_eq_m128i(r, a);
43588        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43589        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43590        assert_eq_m128i(r, e);
43591    }
43592
43593    #[simd_test(enable = "avx512f,avx512vl")]
43594    unsafe fn test_mm_maskz_add_epi32() {
43595        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43596        let b = _mm_set1_epi32(1);
43597        let r = _mm_maskz_add_epi32(0, a, b);
43598        assert_eq_m128i(r, _mm_setzero_si128());
43599        let r = _mm_maskz_add_epi32(0b00001111, a, b);
43600        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43601        assert_eq_m128i(r, e);
43602    }
43603
43604    #[simd_test(enable = "avx512f")]
43605    unsafe fn test_mm512_add_ps() {
43606        #[rustfmt::skip]
43607        let a = _mm512_setr_ps(
43608            0., 1., -1., f32::MAX,
43609            f32::MIN, 100., -100., -32.,
43610            0., 1., -1., f32::MAX,
43611            f32::MIN, 100., -100., -32.,
43612        );
43613        let b = _mm512_set1_ps(1.);
43614        let r = _mm512_add_ps(a, b);
43615        #[rustfmt::skip]
43616        let e = _mm512_setr_ps(
43617            1., 2., 0., f32::MAX,
43618            f32::MIN + 1., 101., -99., -31.,
43619            1., 2., 0., f32::MAX,
43620            f32::MIN + 1., 101., -99., -31.,
43621        );
43622        assert_eq_m512(r, e);
43623    }
43624
43625    #[simd_test(enable = "avx512f")]
43626    unsafe fn test_mm512_mask_add_ps() {
43627        #[rustfmt::skip]
43628        let a = _mm512_setr_ps(
43629            0., 1., -1., f32::MAX,
43630            f32::MIN, 100., -100., -32.,
43631            0., 1., -1., f32::MAX,
43632            f32::MIN, 100., -100., -32.,
43633        );
43634        let b = _mm512_set1_ps(1.);
43635        let r = _mm512_mask_add_ps(a, 0, a, b);
43636        assert_eq_m512(r, a);
43637        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43638        #[rustfmt::skip]
43639        let e = _mm512_setr_ps(
43640            1., 2., 0., f32::MAX,
43641            f32::MIN + 1., 101., -99., -31.,
43642            0., 1., -1., f32::MAX,
43643            f32::MIN, 100., -100., -32.,
43644        );
43645        assert_eq_m512(r, e);
43646    }
43647
43648    #[simd_test(enable = "avx512f")]
43649    unsafe fn test_mm512_maskz_add_ps() {
43650        #[rustfmt::skip]
43651        let a = _mm512_setr_ps(
43652            0., 1., -1., f32::MAX,
43653            f32::MIN, 100., -100., -32.,
43654            0., 1., -1., f32::MAX,
43655            f32::MIN, 100., -100., -32.,
43656        );
43657        let b = _mm512_set1_ps(1.);
43658        let r = _mm512_maskz_add_ps(0, a, b);
43659        assert_eq_m512(r, _mm512_setzero_ps());
43660        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43661        #[rustfmt::skip]
43662        let e = _mm512_setr_ps(
43663            1., 2., 0., f32::MAX,
43664            f32::MIN + 1., 101., -99., -31.,
43665            0., 0., 0., 0.,
43666            0., 0., 0., 0.,
43667        );
43668        assert_eq_m512(r, e);
43669    }
43670
43671    #[simd_test(enable = "avx512f,avx512vl")]
43672    unsafe fn test_mm256_mask_add_ps() {
43673        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43674        let b = _mm256_set1_ps(1.);
43675        let r = _mm256_mask_add_ps(a, 0, a, b);
43676        assert_eq_m256(r, a);
43677        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43678        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43679        assert_eq_m256(r, e);
43680    }
43681
43682    #[simd_test(enable = "avx512f,avx512vl")]
43683    unsafe fn test_mm256_maskz_add_ps() {
43684        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43685        let b = _mm256_set1_ps(1.);
43686        let r = _mm256_maskz_add_ps(0, a, b);
43687        assert_eq_m256(r, _mm256_setzero_ps());
43688        let r = _mm256_maskz_add_ps(0b11111111, a, b);
43689        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43690        assert_eq_m256(r, e);
43691    }
43692
43693    #[simd_test(enable = "avx512f,avx512vl")]
43694    unsafe fn test_mm_mask_add_ps() {
43695        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43696        let b = _mm_set1_ps(1.);
43697        let r = _mm_mask_add_ps(a, 0, a, b);
43698        assert_eq_m128(r, a);
43699        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43700        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43701        assert_eq_m128(r, e);
43702    }
43703
43704    #[simd_test(enable = "avx512f,avx512vl")]
43705    unsafe fn test_mm_maskz_add_ps() {
43706        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43707        let b = _mm_set1_ps(1.);
43708        let r = _mm_maskz_add_ps(0, a, b);
43709        assert_eq_m128(r, _mm_setzero_ps());
43710        let r = _mm_maskz_add_ps(0b00001111, a, b);
43711        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43712        assert_eq_m128(r, e);
43713    }
43714
43715    #[simd_test(enable = "avx512f")]
43716    unsafe fn test_mm512_sub_epi32() {
43717        #[rustfmt::skip]
43718        let a = _mm512_setr_epi32(
43719            0, 1, -1, i32::MAX,
43720            i32::MIN, 100, -100, -32,
43721            0, 1, -1, i32::MAX,
43722            i32::MIN, 100, -100, -32,
43723        );
43724        let b = _mm512_set1_epi32(1);
43725        let r = _mm512_sub_epi32(a, b);
43726        #[rustfmt::skip]
43727        let e = _mm512_setr_epi32(
43728            -1, 0, -2, i32::MAX - 1,
43729            i32::MAX, 99, -101, -33,
43730            -1, 0, -2, i32::MAX - 1,
43731            i32::MAX, 99, -101, -33,
43732        );
43733        assert_eq_m512i(r, e);
43734    }
43735
43736    #[simd_test(enable = "avx512f")]
43737    unsafe fn test_mm512_mask_sub_epi32() {
43738        #[rustfmt::skip]
43739        let a = _mm512_setr_epi32(
43740            0, 1, -1, i32::MAX,
43741            i32::MIN, 100, -100, -32,
43742            0, 1, -1, i32::MAX,
43743            i32::MIN, 100, -100, -32,
43744        );
43745        let b = _mm512_set1_epi32(1);
43746        let r = _mm512_mask_sub_epi32(a, 0, a, b);
43747        assert_eq_m512i(r, a);
43748        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43749        #[rustfmt::skip]
43750        let e = _mm512_setr_epi32(
43751            -1, 0, -2, i32::MAX - 1,
43752            i32::MAX, 99, -101, -33,
43753            0, 1, -1, i32::MAX,
43754            i32::MIN, 100, -100, -32,
43755        );
43756        assert_eq_m512i(r, e);
43757    }
43758
43759    #[simd_test(enable = "avx512f")]
43760    unsafe fn test_mm512_maskz_sub_epi32() {
43761        #[rustfmt::skip]
43762        let a = _mm512_setr_epi32(
43763            0, 1, -1, i32::MAX,
43764            i32::MIN, 100, -100, -32,
43765            0, 1, -1, i32::MAX,
43766            i32::MIN, 100, -100, -32,
43767        );
43768        let b = _mm512_set1_epi32(1);
43769        let r = _mm512_maskz_sub_epi32(0, a, b);
43770        assert_eq_m512i(r, _mm512_setzero_si512());
43771        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
43772        #[rustfmt::skip]
43773        let e = _mm512_setr_epi32(
43774            -1, 0, -2, i32::MAX - 1,
43775            i32::MAX, 99, -101, -33,
43776            0, 0, 0, 0,
43777            0, 0, 0, 0,
43778        );
43779        assert_eq_m512i(r, e);
43780    }
43781
43782    #[simd_test(enable = "avx512f,avx512vl")]
43783    unsafe fn test_mm256_mask_sub_epi32() {
43784        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43785        let b = _mm256_set1_epi32(1);
43786        let r = _mm256_mask_sub_epi32(a, 0, a, b);
43787        assert_eq_m256i(r, a);
43788        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
43789        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43790        assert_eq_m256i(r, e);
43791    }
43792
43793    #[simd_test(enable = "avx512f,avx512vl")]
43794    unsafe fn test_mm256_maskz_sub_epi32() {
43795        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43796        let b = _mm256_set1_epi32(1);
43797        let r = _mm256_maskz_sub_epi32(0, a, b);
43798        assert_eq_m256i(r, _mm256_setzero_si256());
43799        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
43800        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43801        assert_eq_m256i(r, e);
43802    }
43803
43804    #[simd_test(enable = "avx512f,avx512vl")]
43805    unsafe fn test_mm_mask_sub_epi32() {
43806        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43807        let b = _mm_set1_epi32(1);
43808        let r = _mm_mask_sub_epi32(a, 0, a, b);
43809        assert_eq_m128i(r, a);
43810        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
43811        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43812        assert_eq_m128i(r, e);
43813    }
43814
43815    #[simd_test(enable = "avx512f,avx512vl")]
43816    unsafe fn test_mm_maskz_sub_epi32() {
43817        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43818        let b = _mm_set1_epi32(1);
43819        let r = _mm_maskz_sub_epi32(0, a, b);
43820        assert_eq_m128i(r, _mm_setzero_si128());
43821        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
43822        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43823        assert_eq_m128i(r, e);
43824    }
43825
43826    #[simd_test(enable = "avx512f")]
43827    unsafe fn test_mm512_sub_ps() {
43828        #[rustfmt::skip]
43829        let a = _mm512_setr_ps(
43830            0., 1., -1., f32::MAX,
43831            f32::MIN, 100., -100., -32.,
43832            0., 1., -1., f32::MAX,
43833            f32::MIN, 100., -100., -32.,
43834        );
43835        let b = _mm512_set1_ps(1.);
43836        let r = _mm512_sub_ps(a, b);
43837        #[rustfmt::skip]
43838        let e = _mm512_setr_ps(
43839            -1., 0., -2., f32::MAX - 1.,
43840            f32::MIN, 99., -101., -33.,
43841            -1., 0., -2., f32::MAX - 1.,
43842            f32::MIN, 99., -101., -33.,
43843        );
43844        assert_eq_m512(r, e);
43845    }
43846
43847    #[simd_test(enable = "avx512f")]
43848    unsafe fn test_mm512_mask_sub_ps() {
43849        #[rustfmt::skip]
43850        let a = _mm512_setr_ps(
43851            0., 1., -1., f32::MAX,
43852            f32::MIN, 100., -100., -32.,
43853            0., 1., -1., f32::MAX,
43854            f32::MIN, 100., -100., -32.,
43855        );
43856        let b = _mm512_set1_ps(1.);
43857        let r = _mm512_mask_sub_ps(a, 0, a, b);
43858        assert_eq_m512(r, a);
43859        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
43860        #[rustfmt::skip]
43861        let e = _mm512_setr_ps(
43862            -1., 0., -2., f32::MAX - 1.,
43863            f32::MIN, 99., -101., -33.,
43864            0., 1., -1., f32::MAX,
43865            f32::MIN, 100., -100., -32.,
43866        );
43867        assert_eq_m512(r, e);
43868    }
43869
43870    #[simd_test(enable = "avx512f")]
43871    unsafe fn test_mm512_maskz_sub_ps() {
43872        #[rustfmt::skip]
43873        let a = _mm512_setr_ps(
43874            0., 1., -1., f32::MAX,
43875            f32::MIN, 100., -100., -32.,
43876            0., 1., -1., f32::MAX,
43877            f32::MIN, 100., -100., -32.,
43878        );
43879        let b = _mm512_set1_ps(1.);
43880        let r = _mm512_maskz_sub_ps(0, a, b);
43881        assert_eq_m512(r, _mm512_setzero_ps());
43882        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
43883        #[rustfmt::skip]
43884        let e = _mm512_setr_ps(
43885            -1., 0., -2., f32::MAX - 1.,
43886            f32::MIN, 99., -101., -33.,
43887            0., 0., 0., 0.,
43888            0., 0., 0., 0.,
43889        );
43890        assert_eq_m512(r, e);
43891    }
43892
43893    #[simd_test(enable = "avx512f,avx512vl")]
43894    unsafe fn test_mm256_mask_sub_ps() {
43895        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43896        let b = _mm256_set1_ps(1.);
43897        let r = _mm256_mask_sub_ps(a, 0, a, b);
43898        assert_eq_m256(r, a);
43899        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
43900        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43901        assert_eq_m256(r, e);
43902    }
43903
43904    #[simd_test(enable = "avx512f,avx512vl")]
43905    unsafe fn test_mm256_maskz_sub_ps() {
43906        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43907        let b = _mm256_set1_ps(1.);
43908        let r = _mm256_maskz_sub_ps(0, a, b);
43909        assert_eq_m256(r, _mm256_setzero_ps());
43910        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
43911        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43912        assert_eq_m256(r, e);
43913    }
43914
43915    #[simd_test(enable = "avx512f,avx512vl")]
43916    unsafe fn test_mm_mask_sub_ps() {
43917        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43918        let b = _mm_set1_ps(1.);
43919        let r = _mm_mask_sub_ps(a, 0, a, b);
43920        assert_eq_m128(r, a);
43921        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
43922        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
43923        assert_eq_m128(r, e);
43924    }
43925
43926    #[simd_test(enable = "avx512f,avx512vl")]
43927    unsafe fn test_mm_maskz_sub_ps() {
43928        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43929        let b = _mm_set1_ps(1.);
43930        let r = _mm_maskz_sub_ps(0, a, b);
43931        assert_eq_m128(r, _mm_setzero_ps());
43932        let r = _mm_maskz_sub_ps(0b00001111, a, b);
43933        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
43934        assert_eq_m128(r, e);
43935    }
43936
43937    #[simd_test(enable = "avx512f")]
43938    unsafe fn test_mm512_mullo_epi32() {
43939        #[rustfmt::skip]
43940        let a = _mm512_setr_epi32(
43941            0, 1, -1, i32::MAX,
43942            i32::MIN, 100, -100, -32,
43943            0, 1, -1, i32::MAX,
43944            i32::MIN, 100, -100, -32,
43945        );
43946        let b = _mm512_set1_epi32(2);
43947        let r = _mm512_mullo_epi32(a, b);
43948        let e = _mm512_setr_epi32(
43949            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
43950        );
43951        assert_eq_m512i(r, e);
43952    }
43953
43954    #[simd_test(enable = "avx512f")]
43955    unsafe fn test_mm512_mask_mullo_epi32() {
43956        #[rustfmt::skip]
43957        let a = _mm512_setr_epi32(
43958            0, 1, -1, i32::MAX,
43959            i32::MIN, 100, -100, -32,
43960            0, 1, -1, i32::MAX,
43961            i32::MIN, 100, -100, -32,
43962        );
43963        let b = _mm512_set1_epi32(2);
43964        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
43965        assert_eq_m512i(r, a);
43966        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
43967        #[rustfmt::skip]
43968        let e = _mm512_setr_epi32(
43969            0, 2, -2, -2,
43970            0, 200, -200, -64,
43971            0, 1, -1, i32::MAX,
43972            i32::MIN, 100, -100, -32,
43973        );
43974        assert_eq_m512i(r, e);
43975    }
43976
43977    #[simd_test(enable = "avx512f")]
43978    unsafe fn test_mm512_maskz_mullo_epi32() {
43979        #[rustfmt::skip]
43980        let a = _mm512_setr_epi32(
43981            0, 1, -1, i32::MAX,
43982            i32::MIN, 100, -100, -32,
43983            0, 1, -1, i32::MAX,
43984            i32::MIN, 100, -100, -32,
43985        );
43986        let b = _mm512_set1_epi32(2);
43987        let r = _mm512_maskz_mullo_epi32(0, a, b);
43988        assert_eq_m512i(r, _mm512_setzero_si512());
43989        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
43990        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
43991        assert_eq_m512i(r, e);
43992    }
43993
43994    #[simd_test(enable = "avx512f,avx512vl")]
43995    unsafe fn test_mm256_mask_mullo_epi32() {
43996        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43997        let b = _mm256_set1_epi32(2);
43998        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
43999        assert_eq_m256i(r, a);
44000        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
44001        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44002        assert_eq_m256i(r, e);
44003    }
44004
44005    #[simd_test(enable = "avx512f,avx512vl")]
44006    unsafe fn test_mm256_maskz_mullo_epi32() {
44007        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44008        let b = _mm256_set1_epi32(2);
44009        let r = _mm256_maskz_mullo_epi32(0, a, b);
44010        assert_eq_m256i(r, _mm256_setzero_si256());
44011        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
44012        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44013        assert_eq_m256i(r, e);
44014    }
44015
44016    #[simd_test(enable = "avx512f,avx512vl")]
44017    unsafe fn test_mm_mask_mullo_epi32() {
44018        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44019        let b = _mm_set1_epi32(2);
44020        let r = _mm_mask_mullo_epi32(a, 0, a, b);
44021        assert_eq_m128i(r, a);
44022        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
44023        let e = _mm_set_epi32(2, -2, -2, 0);
44024        assert_eq_m128i(r, e);
44025    }
44026
44027    #[simd_test(enable = "avx512f,avx512vl")]
44028    unsafe fn test_mm_maskz_mullo_epi32() {
44029        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44030        let b = _mm_set1_epi32(2);
44031        let r = _mm_maskz_mullo_epi32(0, a, b);
44032        assert_eq_m128i(r, _mm_setzero_si128());
44033        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
44034        let e = _mm_set_epi32(2, -2, -2, 0);
44035        assert_eq_m128i(r, e);
44036    }
44037
44038    #[simd_test(enable = "avx512f")]
44039    unsafe fn test_mm512_mul_ps() {
44040        #[rustfmt::skip]
44041        let a = _mm512_setr_ps(
44042            0., 1., -1., f32::MAX,
44043            f32::MIN, 100., -100., -32.,
44044            0., 1., -1., f32::MAX,
44045            f32::MIN, 100., -100., -32.,
44046        );
44047        let b = _mm512_set1_ps(2.);
44048        let r = _mm512_mul_ps(a, b);
44049        #[rustfmt::skip]
44050        let e = _mm512_setr_ps(
44051            0., 2., -2., f32::INFINITY,
44052            f32::NEG_INFINITY, 200., -200., -64.,
44053            0., 2., -2., f32::INFINITY,
44054            f32::NEG_INFINITY, 200., -200.,
44055            -64.,
44056        );
44057        assert_eq_m512(r, e);
44058    }
44059
44060    #[simd_test(enable = "avx512f")]
44061    unsafe fn test_mm512_mask_mul_ps() {
44062        #[rustfmt::skip]
44063        let a = _mm512_setr_ps(
44064            0., 1., -1., f32::MAX,
44065            f32::MIN, 100., -100., -32.,
44066            0., 1., -1., f32::MAX,
44067            f32::MIN, 100., -100., -32.,
44068        );
44069        let b = _mm512_set1_ps(2.);
44070        let r = _mm512_mask_mul_ps(a, 0, a, b);
44071        assert_eq_m512(r, a);
44072        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
44073        #[rustfmt::skip]
44074        let e = _mm512_setr_ps(
44075            0., 2., -2., f32::INFINITY,
44076            f32::NEG_INFINITY, 200., -200., -64.,
44077            0., 1., -1., f32::MAX,
44078            f32::MIN, 100., -100., -32.,
44079        );
44080        assert_eq_m512(r, e);
44081    }
44082
44083    #[simd_test(enable = "avx512f")]
44084    unsafe fn test_mm512_maskz_mul_ps() {
44085        #[rustfmt::skip]
44086        let a = _mm512_setr_ps(
44087            0., 1., -1., f32::MAX,
44088            f32::MIN, 100., -100., -32.,
44089            0., 1., -1., f32::MAX,
44090            f32::MIN, 100., -100., -32.,
44091        );
44092        let b = _mm512_set1_ps(2.);
44093        let r = _mm512_maskz_mul_ps(0, a, b);
44094        assert_eq_m512(r, _mm512_setzero_ps());
44095        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
44096        #[rustfmt::skip]
44097        let e = _mm512_setr_ps(
44098            0., 2., -2., f32::INFINITY,
44099            f32::NEG_INFINITY, 200., -200., -64.,
44100            0., 0., 0., 0.,
44101            0., 0., 0., 0.,
44102        );
44103        assert_eq_m512(r, e);
44104    }
44105
44106    #[simd_test(enable = "avx512f,avx512vl")]
44107    unsafe fn test_mm256_mask_mul_ps() {
44108        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44109        let b = _mm256_set1_ps(2.);
44110        let r = _mm256_mask_mul_ps(a, 0, a, b);
44111        assert_eq_m256(r, a);
44112        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
44113        #[rustfmt::skip]
44114        let e = _mm256_set_ps(
44115            0., 2., -2., f32::INFINITY,
44116            f32::NEG_INFINITY, 200., -200., -64.,
44117        );
44118        assert_eq_m256(r, e);
44119    }
44120
44121    #[simd_test(enable = "avx512f,avx512vl")]
44122    unsafe fn test_mm256_maskz_mul_ps() {
44123        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44124        let b = _mm256_set1_ps(2.);
44125        let r = _mm256_maskz_mul_ps(0, a, b);
44126        assert_eq_m256(r, _mm256_setzero_ps());
44127        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
44128        #[rustfmt::skip]
44129        let e = _mm256_set_ps(
44130            0., 2., -2., f32::INFINITY,
44131            f32::NEG_INFINITY, 200., -200., -64.,
44132        );
44133        assert_eq_m256(r, e);
44134    }
44135
44136    #[simd_test(enable = "avx512f,avx512vl")]
44137    unsafe fn test_mm_mask_mul_ps() {
44138        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44139        let b = _mm_set1_ps(2.);
44140        let r = _mm_mask_mul_ps(a, 0, a, b);
44141        assert_eq_m128(r, a);
44142        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
44143        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44144        assert_eq_m128(r, e);
44145    }
44146
44147    #[simd_test(enable = "avx512f,avx512vl")]
44148    unsafe fn test_mm_maskz_mul_ps() {
44149        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44150        let b = _mm_set1_ps(2.);
44151        let r = _mm_maskz_mul_ps(0, a, b);
44152        assert_eq_m128(r, _mm_setzero_ps());
44153        let r = _mm_maskz_mul_ps(0b00001111, a, b);
44154        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44155        assert_eq_m128(r, e);
44156    }
44157
44158    #[simd_test(enable = "avx512f")]
44159    unsafe fn test_mm512_div_ps() {
44160        let a = _mm512_setr_ps(
44161            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44162        );
44163        let b = _mm512_setr_ps(
44164            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44165        );
44166        let r = _mm512_div_ps(a, b);
44167        #[rustfmt::skip]
44168        let e = _mm512_setr_ps(
44169            0., 0.5, -0.5, -1.,
44170            50., f32::INFINITY, -50., -16.,
44171            0., 0.5, -0.5, 500.,
44172            f32::NEG_INFINITY, 50., -50., -16.,
44173        );
44174        assert_eq_m512(r, e); // 0/0 = NAN
44175    }
44176
44177    #[simd_test(enable = "avx512f")]
44178    unsafe fn test_mm512_mask_div_ps() {
44179        let a = _mm512_setr_ps(
44180            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44181        );
44182        let b = _mm512_setr_ps(
44183            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44184        );
44185        let r = _mm512_mask_div_ps(a, 0, a, b);
44186        assert_eq_m512(r, a);
44187        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
44188        #[rustfmt::skip]
44189        let e = _mm512_setr_ps(
44190            0., 0.5, -0.5, -1.,
44191            50., f32::INFINITY, -50., -16.,
44192            0., 1., -1., 1000.,
44193            -131., 100., -100., -32.,
44194        );
44195        assert_eq_m512(r, e);
44196    }
44197
44198    #[simd_test(enable = "avx512f")]
44199    unsafe fn test_mm512_maskz_div_ps() {
44200        let a = _mm512_setr_ps(
44201            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44202        );
44203        let b = _mm512_setr_ps(
44204            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44205        );
44206        let r = _mm512_maskz_div_ps(0, a, b);
44207        assert_eq_m512(r, _mm512_setzero_ps());
44208        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
44209        #[rustfmt::skip]
44210        let e = _mm512_setr_ps(
44211            0., 0.5, -0.5, -1.,
44212            50., f32::INFINITY, -50., -16.,
44213            0., 0., 0., 0.,
44214            0., 0., 0., 0.,
44215        );
44216        assert_eq_m512(r, e);
44217    }
44218
44219    #[simd_test(enable = "avx512f,avx512vl")]
44220    unsafe fn test_mm256_mask_div_ps() {
44221        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44222        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44223        let r = _mm256_mask_div_ps(a, 0, a, b);
44224        assert_eq_m256(r, a);
44225        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
44226        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44227        assert_eq_m256(r, e);
44228    }
44229
44230    #[simd_test(enable = "avx512f,avx512vl")]
44231    unsafe fn test_mm256_maskz_div_ps() {
44232        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44233        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44234        let r = _mm256_maskz_div_ps(0, a, b);
44235        assert_eq_m256(r, _mm256_setzero_ps());
44236        let r = _mm256_maskz_div_ps(0b11111111, a, b);
44237        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44238        assert_eq_m256(r, e);
44239    }
44240
44241    #[simd_test(enable = "avx512f,avx512vl")]
44242    unsafe fn test_mm_mask_div_ps() {
44243        let a = _mm_set_ps(100., 100., -100., -32.);
44244        let b = _mm_set_ps(2., 0., 2., 2.);
44245        let r = _mm_mask_div_ps(a, 0, a, b);
44246        assert_eq_m128(r, a);
44247        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
44248        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44249        assert_eq_m128(r, e);
44250    }
44251
44252    #[simd_test(enable = "avx512f,avx512vl")]
44253    unsafe fn test_mm_maskz_div_ps() {
44254        let a = _mm_set_ps(100., 100., -100., -32.);
44255        let b = _mm_set_ps(2., 0., 2., 2.);
44256        let r = _mm_maskz_div_ps(0, a, b);
44257        assert_eq_m128(r, _mm_setzero_ps());
44258        let r = _mm_maskz_div_ps(0b00001111, a, b);
44259        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44260        assert_eq_m128(r, e);
44261    }
44262
44263    #[simd_test(enable = "avx512f")]
44264    unsafe fn test_mm512_max_epi32() {
44265        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44266        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44267        let r = _mm512_max_epi32(a, b);
44268        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44269        assert_eq_m512i(r, e);
44270    }
44271
44272    #[simd_test(enable = "avx512f")]
44273    unsafe fn test_mm512_mask_max_epi32() {
44274        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44275        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44276        let r = _mm512_mask_max_epi32(a, 0, a, b);
44277        assert_eq_m512i(r, a);
44278        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
44279        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44280        assert_eq_m512i(r, e);
44281    }
44282
44283    #[simd_test(enable = "avx512f")]
44284    unsafe fn test_mm512_maskz_max_epi32() {
44285        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44286        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44287        let r = _mm512_maskz_max_epi32(0, a, b);
44288        assert_eq_m512i(r, _mm512_setzero_si512());
44289        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
44290        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44291        assert_eq_m512i(r, e);
44292    }
44293
44294    #[simd_test(enable = "avx512f,avx512vl")]
44295    unsafe fn test_mm256_mask_max_epi32() {
44296        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44297        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44298        let r = _mm256_mask_max_epi32(a, 0, a, b);
44299        assert_eq_m256i(r, a);
44300        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
44301        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44302        assert_eq_m256i(r, e);
44303    }
44304
44305    #[simd_test(enable = "avx512f,avx512vl")]
44306    unsafe fn test_mm256_maskz_max_epi32() {
44307        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44308        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44309        let r = _mm256_maskz_max_epi32(0, a, b);
44310        assert_eq_m256i(r, _mm256_setzero_si256());
44311        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
44312        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44313        assert_eq_m256i(r, e);
44314    }
44315
44316    #[simd_test(enable = "avx512f,avx512vl")]
44317    unsafe fn test_mm_mask_max_epi32() {
44318        let a = _mm_set_epi32(0, 1, 2, 3);
44319        let b = _mm_set_epi32(3, 2, 1, 0);
44320        let r = _mm_mask_max_epi32(a, 0, a, b);
44321        assert_eq_m128i(r, a);
44322        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
44323        let e = _mm_set_epi32(3, 2, 2, 3);
44324        assert_eq_m128i(r, e);
44325    }
44326
44327    #[simd_test(enable = "avx512f,avx512vl")]
44328    unsafe fn test_mm_maskz_max_epi32() {
44329        let a = _mm_set_epi32(0, 1, 2, 3);
44330        let b = _mm_set_epi32(3, 2, 1, 0);
44331        let r = _mm_maskz_max_epi32(0, a, b);
44332        assert_eq_m128i(r, _mm_setzero_si128());
44333        let r = _mm_maskz_max_epi32(0b00001111, a, b);
44334        let e = _mm_set_epi32(3, 2, 2, 3);
44335        assert_eq_m128i(r, e);
44336    }
44337
44338    #[simd_test(enable = "avx512f")]
44339    unsafe fn test_mm512_max_ps() {
44340        let a = _mm512_setr_ps(
44341            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44342        );
44343        let b = _mm512_setr_ps(
44344            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44345        );
44346        let r = _mm512_max_ps(a, b);
44347        let e = _mm512_setr_ps(
44348            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44349        );
44350        assert_eq_m512(r, e);
44351    }
44352
44353    #[simd_test(enable = "avx512f")]
44354    unsafe fn test_mm512_mask_max_ps() {
44355        let a = _mm512_setr_ps(
44356            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44357        );
44358        let b = _mm512_setr_ps(
44359            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44360        );
44361        let r = _mm512_mask_max_ps(a, 0, a, b);
44362        assert_eq_m512(r, a);
44363        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
44364        let e = _mm512_setr_ps(
44365            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44366        );
44367        assert_eq_m512(r, e);
44368    }
44369
44370    #[simd_test(enable = "avx512f")]
44371    unsafe fn test_mm512_maskz_max_ps() {
44372        let a = _mm512_setr_ps(
44373            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44374        );
44375        let b = _mm512_setr_ps(
44376            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44377        );
44378        let r = _mm512_maskz_max_ps(0, a, b);
44379        assert_eq_m512(r, _mm512_setzero_ps());
44380        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
44381        let e = _mm512_setr_ps(
44382            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44383        );
44384        assert_eq_m512(r, e);
44385    }
44386
44387    #[simd_test(enable = "avx512f,avx512vl")]
44388    unsafe fn test_mm256_mask_max_ps() {
44389        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44390        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44391        let r = _mm256_mask_max_ps(a, 0, a, b);
44392        assert_eq_m256(r, a);
44393        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
44394        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44395        assert_eq_m256(r, e);
44396    }
44397
44398    #[simd_test(enable = "avx512f,avx512vl")]
44399    unsafe fn test_mm256_maskz_max_ps() {
44400        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44401        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44402        let r = _mm256_maskz_max_ps(0, a, b);
44403        assert_eq_m256(r, _mm256_setzero_ps());
44404        let r = _mm256_maskz_max_ps(0b11111111, a, b);
44405        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44406        assert_eq_m256(r, e);
44407    }
44408
44409    #[simd_test(enable = "avx512f,avx512vl")]
44410    unsafe fn test_mm_mask_max_ps() {
44411        let a = _mm_set_ps(0., 1., 2., 3.);
44412        let b = _mm_set_ps(3., 2., 1., 0.);
44413        let r = _mm_mask_max_ps(a, 0, a, b);
44414        assert_eq_m128(r, a);
44415        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44416        let e = _mm_set_ps(3., 2., 2., 3.);
44417        assert_eq_m128(r, e);
44418    }
44419
44420    #[simd_test(enable = "avx512f,avx512vl")]
44421    unsafe fn test_mm_maskz_max_ps() {
44422        let a = _mm_set_ps(0., 1., 2., 3.);
44423        let b = _mm_set_ps(3., 2., 1., 0.);
44424        let r = _mm_maskz_max_ps(0, a, b);
44425        assert_eq_m128(r, _mm_setzero_ps());
44426        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44427        let e = _mm_set_ps(3., 2., 2., 3.);
44428        assert_eq_m128(r, e);
44429    }
44430
44431    #[simd_test(enable = "avx512f")]
44432    unsafe fn test_mm512_max_epu32() {
44433        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44434        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44435        let r = _mm512_max_epu32(a, b);
44436        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44437        assert_eq_m512i(r, e);
44438    }
44439
44440    #[simd_test(enable = "avx512f")]
44441    unsafe fn test_mm512_mask_max_epu32() {
44442        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44443        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44444        let r = _mm512_mask_max_epu32(a, 0, a, b);
44445        assert_eq_m512i(r, a);
44446        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
44447        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44448        assert_eq_m512i(r, e);
44449    }
44450
44451    #[simd_test(enable = "avx512f")]
44452    unsafe fn test_mm512_maskz_max_epu32() {
44453        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44454        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44455        let r = _mm512_maskz_max_epu32(0, a, b);
44456        assert_eq_m512i(r, _mm512_setzero_si512());
44457        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
44458        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44459        assert_eq_m512i(r, e);
44460    }
44461
44462    #[simd_test(enable = "avx512f,avx512vl")]
44463    unsafe fn test_mm256_mask_max_epu32() {
44464        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44465        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44466        let r = _mm256_mask_max_epu32(a, 0, a, b);
44467        assert_eq_m256i(r, a);
44468        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
44469        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44470        assert_eq_m256i(r, e);
44471    }
44472
44473    #[simd_test(enable = "avx512f,avx512vl")]
44474    unsafe fn test_mm256_maskz_max_epu32() {
44475        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44476        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44477        let r = _mm256_maskz_max_epu32(0, a, b);
44478        assert_eq_m256i(r, _mm256_setzero_si256());
44479        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
44480        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44481        assert_eq_m256i(r, e);
44482    }
44483
44484    #[simd_test(enable = "avx512f,avx512vl")]
44485    unsafe fn test_mm_mask_max_epu32() {
44486        let a = _mm_set_epi32(0, 1, 2, 3);
44487        let b = _mm_set_epi32(3, 2, 1, 0);
44488        let r = _mm_mask_max_epu32(a, 0, a, b);
44489        assert_eq_m128i(r, a);
44490        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
44491        let e = _mm_set_epi32(3, 2, 2, 3);
44492        assert_eq_m128i(r, e);
44493    }
44494
44495    #[simd_test(enable = "avx512f,avx512vl")]
44496    unsafe fn test_mm_maskz_max_epu32() {
44497        let a = _mm_set_epi32(0, 1, 2, 3);
44498        let b = _mm_set_epi32(3, 2, 1, 0);
44499        let r = _mm_maskz_max_epu32(0, a, b);
44500        assert_eq_m128i(r, _mm_setzero_si128());
44501        let r = _mm_maskz_max_epu32(0b00001111, a, b);
44502        let e = _mm_set_epi32(3, 2, 2, 3);
44503        assert_eq_m128i(r, e);
44504    }
44505
44506    #[simd_test(enable = "avx512f")]
44507    unsafe fn test_mm512_min_epi32() {
44508        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44509        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44510        let r = _mm512_min_epi32(a, b);
44511        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44512        assert_eq_m512i(r, e);
44513    }
44514
44515    #[simd_test(enable = "avx512f")]
44516    unsafe fn test_mm512_mask_min_epi32() {
44517        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44518        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44519        let r = _mm512_mask_min_epi32(a, 0, a, b);
44520        assert_eq_m512i(r, a);
44521        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
44522        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44523        assert_eq_m512i(r, e);
44524    }
44525
44526    #[simd_test(enable = "avx512f")]
44527    unsafe fn test_mm512_maskz_min_epi32() {
44528        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44529        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44530        let r = _mm512_maskz_min_epi32(0, a, b);
44531        assert_eq_m512i(r, _mm512_setzero_si512());
44532        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
44533        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44534        assert_eq_m512i(r, e);
44535    }
44536
44537    #[simd_test(enable = "avx512f,avx512vl")]
44538    unsafe fn test_mm256_mask_min_epi32() {
44539        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44540        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44541        let r = _mm256_mask_min_epi32(a, 0, a, b);
44542        assert_eq_m256i(r, a);
44543        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
44544        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44545        assert_eq_m256i(r, e);
44546    }
44547
44548    #[simd_test(enable = "avx512f,avx512vl")]
44549    unsafe fn test_mm256_maskz_min_epi32() {
44550        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44551        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44552        let r = _mm256_maskz_min_epi32(0, a, b);
44553        assert_eq_m256i(r, _mm256_setzero_si256());
44554        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
44555        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44556        assert_eq_m256i(r, e);
44557    }
44558
44559    #[simd_test(enable = "avx512f,avx512vl")]
44560    unsafe fn test_mm_mask_min_epi32() {
44561        let a = _mm_set_epi32(0, 1, 2, 3);
44562        let b = _mm_set_epi32(3, 2, 1, 0);
44563        let r = _mm_mask_min_epi32(a, 0, a, b);
44564        assert_eq_m128i(r, a);
44565        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
44566        let e = _mm_set_epi32(0, 1, 1, 0);
44567        assert_eq_m128i(r, e);
44568    }
44569
44570    #[simd_test(enable = "avx512f,avx512vl")]
44571    unsafe fn test_mm_maskz_min_epi32() {
44572        let a = _mm_set_epi32(0, 1, 2, 3);
44573        let b = _mm_set_epi32(3, 2, 1, 0);
44574        let r = _mm_maskz_min_epi32(0, a, b);
44575        assert_eq_m128i(r, _mm_setzero_si128());
44576        let r = _mm_maskz_min_epi32(0b00001111, a, b);
44577        let e = _mm_set_epi32(0, 1, 1, 0);
44578        assert_eq_m128i(r, e);
44579    }
44580
44581    #[simd_test(enable = "avx512f")]
44582    unsafe fn test_mm512_min_ps() {
44583        let a = _mm512_setr_ps(
44584            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44585        );
44586        let b = _mm512_setr_ps(
44587            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44588        );
44589        let r = _mm512_min_ps(a, b);
44590        let e = _mm512_setr_ps(
44591            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
44592        );
44593        assert_eq_m512(r, e);
44594    }
44595
44596    #[simd_test(enable = "avx512f")]
44597    unsafe fn test_mm512_mask_min_ps() {
44598        let a = _mm512_setr_ps(
44599            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44600        );
44601        let b = _mm512_setr_ps(
44602            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44603        );
44604        let r = _mm512_mask_min_ps(a, 0, a, b);
44605        assert_eq_m512(r, a);
44606        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
44607        let e = _mm512_setr_ps(
44608            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44609        );
44610        assert_eq_m512(r, e);
44611    }
44612
44613    #[simd_test(enable = "avx512f")]
44614    unsafe fn test_mm512_maskz_min_ps() {
44615        let a = _mm512_setr_ps(
44616            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44617        );
44618        let b = _mm512_setr_ps(
44619            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44620        );
44621        let r = _mm512_maskz_min_ps(0, a, b);
44622        assert_eq_m512(r, _mm512_setzero_ps());
44623        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
44624        let e = _mm512_setr_ps(
44625            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44626        );
44627        assert_eq_m512(r, e);
44628    }
44629
44630    #[simd_test(enable = "avx512f,avx512vl")]
44631    unsafe fn test_mm256_mask_min_ps() {
44632        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44633        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44634        let r = _mm256_mask_min_ps(a, 0, a, b);
44635        assert_eq_m256(r, a);
44636        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
44637        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44638        assert_eq_m256(r, e);
44639    }
44640
44641    #[simd_test(enable = "avx512f,avx512vl")]
44642    unsafe fn test_mm256_maskz_min_ps() {
44643        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44644        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44645        let r = _mm256_maskz_min_ps(0, a, b);
44646        assert_eq_m256(r, _mm256_setzero_ps());
44647        let r = _mm256_maskz_min_ps(0b11111111, a, b);
44648        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44649        assert_eq_m256(r, e);
44650    }
44651
44652    #[simd_test(enable = "avx512f,avx512vl")]
44653    unsafe fn test_mm_mask_min_ps() {
44654        let a = _mm_set_ps(0., 1., 2., 3.);
44655        let b = _mm_set_ps(3., 2., 1., 0.);
44656        let r = _mm_mask_min_ps(a, 0, a, b);
44657        assert_eq_m128(r, a);
44658        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
44659        let e = _mm_set_ps(0., 1., 1., 0.);
44660        assert_eq_m128(r, e);
44661    }
44662
44663    #[simd_test(enable = "avx512f,avx512vl")]
44664    unsafe fn test_mm_maskz_min_ps() {
44665        let a = _mm_set_ps(0., 1., 2., 3.);
44666        let b = _mm_set_ps(3., 2., 1., 0.);
44667        let r = _mm_maskz_min_ps(0, a, b);
44668        assert_eq_m128(r, _mm_setzero_ps());
44669        let r = _mm_maskz_min_ps(0b00001111, a, b);
44670        let e = _mm_set_ps(0., 1., 1., 0.);
44671        assert_eq_m128(r, e);
44672    }
44673
44674    #[simd_test(enable = "avx512f")]
44675    unsafe fn test_mm512_min_epu32() {
44676        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44677        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44678        let r = _mm512_min_epu32(a, b);
44679        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44680        assert_eq_m512i(r, e);
44681    }
44682
44683    #[simd_test(enable = "avx512f")]
44684    unsafe fn test_mm512_mask_min_epu32() {
44685        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44686        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44687        let r = _mm512_mask_min_epu32(a, 0, a, b);
44688        assert_eq_m512i(r, a);
44689        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
44690        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44691        assert_eq_m512i(r, e);
44692    }
44693
44694    #[simd_test(enable = "avx512f")]
44695    unsafe fn test_mm512_maskz_min_epu32() {
44696        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44697        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44698        let r = _mm512_maskz_min_epu32(0, a, b);
44699        assert_eq_m512i(r, _mm512_setzero_si512());
44700        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
44701        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44702        assert_eq_m512i(r, e);
44703    }
44704
44705    #[simd_test(enable = "avx512f,avx512vl")]
44706    unsafe fn test_mm256_mask_min_epu32() {
44707        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44708        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44709        let r = _mm256_mask_min_epu32(a, 0, a, b);
44710        assert_eq_m256i(r, a);
44711        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
44712        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44713        assert_eq_m256i(r, e);
44714    }
44715
44716    #[simd_test(enable = "avx512f,avx512vl")]
44717    unsafe fn test_mm256_maskz_min_epu32() {
44718        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44719        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44720        let r = _mm256_maskz_min_epu32(0, a, b);
44721        assert_eq_m256i(r, _mm256_setzero_si256());
44722        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
44723        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44724        assert_eq_m256i(r, e);
44725    }
44726
44727    #[simd_test(enable = "avx512f,avx512vl")]
44728    unsafe fn test_mm_mask_min_epu32() {
44729        let a = _mm_set_epi32(0, 1, 2, 3);
44730        let b = _mm_set_epi32(3, 2, 1, 0);
44731        let r = _mm_mask_min_epu32(a, 0, a, b);
44732        assert_eq_m128i(r, a);
44733        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
44734        let e = _mm_set_epi32(0, 1, 1, 0);
44735        assert_eq_m128i(r, e);
44736    }
44737
44738    #[simd_test(enable = "avx512f,avx512vl")]
44739    unsafe fn test_mm_maskz_min_epu32() {
44740        let a = _mm_set_epi32(0, 1, 2, 3);
44741        let b = _mm_set_epi32(3, 2, 1, 0);
44742        let r = _mm_maskz_min_epu32(0, a, b);
44743        assert_eq_m128i(r, _mm_setzero_si128());
44744        let r = _mm_maskz_min_epu32(0b00001111, a, b);
44745        let e = _mm_set_epi32(0, 1, 1, 0);
44746        assert_eq_m128i(r, e);
44747    }
44748
44749    #[simd_test(enable = "avx512f")]
44750    unsafe fn test_mm512_sqrt_ps() {
44751        let a = _mm512_setr_ps(
44752            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44753        );
44754        let r = _mm512_sqrt_ps(a);
44755        let e = _mm512_setr_ps(
44756            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44757        );
44758        assert_eq_m512(r, e);
44759    }
44760
44761    #[simd_test(enable = "avx512f")]
44762    unsafe fn test_mm512_mask_sqrt_ps() {
44763        let a = _mm512_setr_ps(
44764            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44765        );
44766        let r = _mm512_mask_sqrt_ps(a, 0, a);
44767        assert_eq_m512(r, a);
44768        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
44769        let e = _mm512_setr_ps(
44770            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
44771        );
44772        assert_eq_m512(r, e);
44773    }
44774
44775    #[simd_test(enable = "avx512f")]
44776    unsafe fn test_mm512_maskz_sqrt_ps() {
44777        let a = _mm512_setr_ps(
44778            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44779        );
44780        let r = _mm512_maskz_sqrt_ps(0, a);
44781        assert_eq_m512(r, _mm512_setzero_ps());
44782        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
44783        let e = _mm512_setr_ps(
44784            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44785        );
44786        assert_eq_m512(r, e);
44787    }
44788
44789    #[simd_test(enable = "avx512f,avx512vl")]
44790    unsafe fn test_mm256_mask_sqrt_ps() {
44791        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44792        let r = _mm256_mask_sqrt_ps(a, 0, a);
44793        assert_eq_m256(r, a);
44794        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
44795        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44796        assert_eq_m256(r, e);
44797    }
44798
44799    #[simd_test(enable = "avx512f,avx512vl")]
44800    unsafe fn test_mm256_maskz_sqrt_ps() {
44801        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44802        let r = _mm256_maskz_sqrt_ps(0, a);
44803        assert_eq_m256(r, _mm256_setzero_ps());
44804        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
44805        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44806        assert_eq_m256(r, e);
44807    }
44808
44809    #[simd_test(enable = "avx512f,avx512vl")]
44810    unsafe fn test_mm_mask_sqrt_ps() {
44811        let a = _mm_set_ps(0., 1., 4., 9.);
44812        let r = _mm_mask_sqrt_ps(a, 0, a);
44813        assert_eq_m128(r, a);
44814        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
44815        let e = _mm_set_ps(0., 1., 2., 3.);
44816        assert_eq_m128(r, e);
44817    }
44818
44819    #[simd_test(enable = "avx512f,avx512vl")]
44820    unsafe fn test_mm_maskz_sqrt_ps() {
44821        let a = _mm_set_ps(0., 1., 4., 9.);
44822        let r = _mm_maskz_sqrt_ps(0, a);
44823        assert_eq_m128(r, _mm_setzero_ps());
44824        let r = _mm_maskz_sqrt_ps(0b00001111, a);
44825        let e = _mm_set_ps(0., 1., 2., 3.);
44826        assert_eq_m128(r, e);
44827    }
44828
44829    #[simd_test(enable = "avx512f")]
44830    unsafe fn test_mm512_fmadd_ps() {
44831        let a = _mm512_set1_ps(1.);
44832        let b = _mm512_setr_ps(
44833            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44834        );
44835        let c = _mm512_set1_ps(1.);
44836        let r = _mm512_fmadd_ps(a, b, c);
44837        let e = _mm512_setr_ps(
44838            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
44839        );
44840        assert_eq_m512(r, e);
44841    }
44842
44843    #[simd_test(enable = "avx512f")]
44844    unsafe fn test_mm512_mask_fmadd_ps() {
44845        let a = _mm512_set1_ps(1.);
44846        let b = _mm512_setr_ps(
44847            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44848        );
44849        let c = _mm512_set1_ps(1.);
44850        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
44851        assert_eq_m512(r, a);
44852        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
44853        let e = _mm512_setr_ps(
44854            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
44855        );
44856        assert_eq_m512(r, e);
44857    }
44858
44859    #[simd_test(enable = "avx512f")]
44860    unsafe fn test_mm512_maskz_fmadd_ps() {
44861        let a = _mm512_set1_ps(1.);
44862        let b = _mm512_setr_ps(
44863            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44864        );
44865        let c = _mm512_set1_ps(1.);
44866        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
44867        assert_eq_m512(r, _mm512_setzero_ps());
44868        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
44869        let e = _mm512_setr_ps(
44870            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44871        );
44872        assert_eq_m512(r, e);
44873    }
44874
44875    #[simd_test(enable = "avx512f")]
44876    unsafe fn test_mm512_mask3_fmadd_ps() {
44877        let a = _mm512_set1_ps(1.);
44878        let b = _mm512_setr_ps(
44879            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44880        );
44881        let c = _mm512_set1_ps(2.);
44882        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
44883        assert_eq_m512(r, c);
44884        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
44885        let e = _mm512_setr_ps(
44886            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
44887        );
44888        assert_eq_m512(r, e);
44889    }
44890
44891    #[simd_test(enable = "avx512f,avx512vl")]
44892    unsafe fn test_mm256_mask_fmadd_ps() {
44893        let a = _mm256_set1_ps(1.);
44894        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44895        let c = _mm256_set1_ps(1.);
44896        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
44897        assert_eq_m256(r, a);
44898        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
44899        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44900        assert_eq_m256(r, e);
44901    }
44902
44903    #[simd_test(enable = "avx512f,avx512vl")]
44904    unsafe fn test_mm256_maskz_fmadd_ps() {
44905        let a = _mm256_set1_ps(1.);
44906        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44907        let c = _mm256_set1_ps(1.);
44908        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
44909        assert_eq_m256(r, _mm256_setzero_ps());
44910        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
44911        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44912        assert_eq_m256(r, e);
44913    }
44914
44915    #[simd_test(enable = "avx512f,avx512vl")]
44916    unsafe fn test_mm256_mask3_fmadd_ps() {
44917        let a = _mm256_set1_ps(1.);
44918        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44919        let c = _mm256_set1_ps(1.);
44920        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
44921        assert_eq_m256(r, c);
44922        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
44923        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44924        assert_eq_m256(r, e);
44925    }
44926
44927    #[simd_test(enable = "avx512f,avx512vl")]
44928    unsafe fn test_mm_mask_fmadd_ps() {
44929        let a = _mm_set1_ps(1.);
44930        let b = _mm_set_ps(0., 1., 2., 3.);
44931        let c = _mm_set1_ps(1.);
44932        let r = _mm_mask_fmadd_ps(a, 0, b, c);
44933        assert_eq_m128(r, a);
44934        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
44935        let e = _mm_set_ps(1., 2., 3., 4.);
44936        assert_eq_m128(r, e);
44937    }
44938
44939    #[simd_test(enable = "avx512f,avx512vl")]
44940    unsafe fn test_mm_maskz_fmadd_ps() {
44941        let a = _mm_set1_ps(1.);
44942        let b = _mm_set_ps(0., 1., 2., 3.);
44943        let c = _mm_set1_ps(1.);
44944        let r = _mm_maskz_fmadd_ps(0, a, b, c);
44945        assert_eq_m128(r, _mm_setzero_ps());
44946        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
44947        let e = _mm_set_ps(1., 2., 3., 4.);
44948        assert_eq_m128(r, e);
44949    }
44950
44951    #[simd_test(enable = "avx512f,avx512vl")]
44952    unsafe fn test_mm_mask3_fmadd_ps() {
44953        let a = _mm_set1_ps(1.);
44954        let b = _mm_set_ps(0., 1., 2., 3.);
44955        let c = _mm_set1_ps(1.);
44956        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
44957        assert_eq_m128(r, c);
44958        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
44959        let e = _mm_set_ps(1., 2., 3., 4.);
44960        assert_eq_m128(r, e);
44961    }
44962
44963    #[simd_test(enable = "avx512f")]
44964    unsafe fn test_mm512_fmsub_ps() {
44965        let a = _mm512_setr_ps(
44966            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
44967        );
44968        let b = _mm512_setr_ps(
44969            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44970        );
44971        let c = _mm512_setr_ps(
44972            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
44973        );
44974        let r = _mm512_fmsub_ps(a, b, c);
44975        let e = _mm512_setr_ps(
44976            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
44977        );
44978        assert_eq_m512(r, e);
44979    }
44980
44981    #[simd_test(enable = "avx512f")]
44982    unsafe fn test_mm512_mask_fmsub_ps() {
44983        let a = _mm512_set1_ps(1.);
44984        let b = _mm512_setr_ps(
44985            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44986        );
44987        let c = _mm512_set1_ps(1.);
44988        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
44989        assert_eq_m512(r, a);
44990        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
44991        let e = _mm512_setr_ps(
44992            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
44993        );
44994        assert_eq_m512(r, e);
44995    }
44996
44997    #[simd_test(enable = "avx512f")]
44998    unsafe fn test_mm512_maskz_fmsub_ps() {
44999        let a = _mm512_set1_ps(1.);
45000        let b = _mm512_setr_ps(
45001            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45002        );
45003        let c = _mm512_set1_ps(1.);
45004        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
45005        assert_eq_m512(r, _mm512_setzero_ps());
45006        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
45007        let e = _mm512_setr_ps(
45008            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45009        );
45010        assert_eq_m512(r, e);
45011    }
45012
45013    #[simd_test(enable = "avx512f")]
45014    unsafe fn test_mm512_mask3_fmsub_ps() {
45015        let a = _mm512_set1_ps(1.);
45016        let b = _mm512_setr_ps(
45017            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45018        );
45019        let c = _mm512_setr_ps(
45020            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45021        );
45022        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
45023        assert_eq_m512(r, c);
45024        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
45025        let e = _mm512_setr_ps(
45026            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45027        );
45028        assert_eq_m512(r, e);
45029    }
45030
45031    #[simd_test(enable = "avx512f,avx512vl")]
45032    unsafe fn test_mm256_mask_fmsub_ps() {
45033        let a = _mm256_set1_ps(1.);
45034        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45035        let c = _mm256_set1_ps(1.);
45036        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
45037        assert_eq_m256(r, a);
45038        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
45039        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45040        assert_eq_m256(r, e);
45041    }
45042
45043    #[simd_test(enable = "avx512f,avx512vl")]
45044    unsafe fn test_mm256_maskz_fmsub_ps() {
45045        let a = _mm256_set1_ps(1.);
45046        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45047        let c = _mm256_set1_ps(1.);
45048        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
45049        assert_eq_m256(r, _mm256_setzero_ps());
45050        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
45051        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45052        assert_eq_m256(r, e);
45053    }
45054
45055    #[simd_test(enable = "avx512f,avx512vl")]
45056    unsafe fn test_mm256_mask3_fmsub_ps() {
45057        let a = _mm256_set1_ps(1.);
45058        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45059        let c = _mm256_set1_ps(1.);
45060        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
45061        assert_eq_m256(r, c);
45062        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
45063        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45064        assert_eq_m256(r, e);
45065    }
45066
45067    #[simd_test(enable = "avx512f,avx512vl")]
45068    unsafe fn test_mm_mask_fmsub_ps() {
45069        let a = _mm_set1_ps(1.);
45070        let b = _mm_set_ps(0., 1., 2., 3.);
45071        let c = _mm_set1_ps(1.);
45072        let r = _mm_mask_fmsub_ps(a, 0, b, c);
45073        assert_eq_m128(r, a);
45074        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
45075        let e = _mm_set_ps(-1., 0., 1., 2.);
45076        assert_eq_m128(r, e);
45077    }
45078
45079    #[simd_test(enable = "avx512f,avx512vl")]
45080    unsafe fn test_mm_maskz_fmsub_ps() {
45081        let a = _mm_set1_ps(1.);
45082        let b = _mm_set_ps(0., 1., 2., 3.);
45083        let c = _mm_set1_ps(1.);
45084        let r = _mm_maskz_fmsub_ps(0, a, b, c);
45085        assert_eq_m128(r, _mm_setzero_ps());
45086        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
45087        let e = _mm_set_ps(-1., 0., 1., 2.);
45088        assert_eq_m128(r, e);
45089    }
45090
45091    #[simd_test(enable = "avx512f,avx512vl")]
45092    unsafe fn test_mm_mask3_fmsub_ps() {
45093        let a = _mm_set1_ps(1.);
45094        let b = _mm_set_ps(0., 1., 2., 3.);
45095        let c = _mm_set1_ps(1.);
45096        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
45097        assert_eq_m128(r, c);
45098        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
45099        let e = _mm_set_ps(-1., 0., 1., 2.);
45100        assert_eq_m128(r, e);
45101    }
45102
45103    #[simd_test(enable = "avx512f")]
45104    unsafe fn test_mm512_fmaddsub_ps() {
45105        let a = _mm512_set1_ps(1.);
45106        let b = _mm512_setr_ps(
45107            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45108        );
45109        let c = _mm512_set1_ps(1.);
45110        let r = _mm512_fmaddsub_ps(a, b, c);
45111        let e = _mm512_setr_ps(
45112            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
45113        );
45114        assert_eq_m512(r, e);
45115    }
45116
45117    #[simd_test(enable = "avx512f")]
45118    unsafe fn test_mm512_mask_fmaddsub_ps() {
45119        let a = _mm512_set1_ps(1.);
45120        let b = _mm512_setr_ps(
45121            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45122        );
45123        let c = _mm512_set1_ps(1.);
45124        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
45125        assert_eq_m512(r, a);
45126        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
45127        let e = _mm512_setr_ps(
45128            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45129        );
45130        assert_eq_m512(r, e);
45131    }
45132
45133    #[simd_test(enable = "avx512f")]
45134    unsafe fn test_mm512_maskz_fmaddsub_ps() {
45135        let a = _mm512_set1_ps(1.);
45136        let b = _mm512_setr_ps(
45137            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45138        );
45139        let c = _mm512_set1_ps(1.);
45140        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
45141        assert_eq_m512(r, _mm512_setzero_ps());
45142        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
45143        let e = _mm512_setr_ps(
45144            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45145        );
45146        assert_eq_m512(r, e);
45147    }
45148
45149    #[simd_test(enable = "avx512f")]
45150    unsafe fn test_mm512_mask3_fmaddsub_ps() {
45151        let a = _mm512_set1_ps(1.);
45152        let b = _mm512_setr_ps(
45153            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45154        );
45155        let c = _mm512_setr_ps(
45156            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45157        );
45158        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
45159        assert_eq_m512(r, c);
45160        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
45161        let e = _mm512_setr_ps(
45162            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
45163        );
45164        assert_eq_m512(r, e);
45165    }
45166
45167    #[simd_test(enable = "avx512f,avx512vl")]
45168    unsafe fn test_mm256_mask_fmaddsub_ps() {
45169        let a = _mm256_set1_ps(1.);
45170        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45171        let c = _mm256_set1_ps(1.);
45172        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
45173        assert_eq_m256(r, a);
45174        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
45175        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45176        assert_eq_m256(r, e);
45177    }
45178
45179    #[simd_test(enable = "avx512f,avx512vl")]
45180    unsafe fn test_mm256_maskz_fmaddsub_ps() {
45181        let a = _mm256_set1_ps(1.);
45182        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45183        let c = _mm256_set1_ps(1.);
45184        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
45185        assert_eq_m256(r, _mm256_setzero_ps());
45186        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
45187        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45188        assert_eq_m256(r, e);
45189    }
45190
45191    #[simd_test(enable = "avx512f,avx512vl")]
45192    unsafe fn test_mm256_mask3_fmaddsub_ps() {
45193        let a = _mm256_set1_ps(1.);
45194        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45195        let c = _mm256_set1_ps(1.);
45196        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
45197        assert_eq_m256(r, c);
45198        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
45199        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45200        assert_eq_m256(r, e);
45201    }
45202
45203    #[simd_test(enable = "avx512f,avx512vl")]
45204    unsafe fn test_mm_mask_fmaddsub_ps() {
45205        let a = _mm_set1_ps(1.);
45206        let b = _mm_set_ps(0., 1., 2., 3.);
45207        let c = _mm_set1_ps(1.);
45208        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
45209        assert_eq_m128(r, a);
45210        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
45211        let e = _mm_set_ps(1., 0., 3., 2.);
45212        assert_eq_m128(r, e);
45213    }
45214
45215    #[simd_test(enable = "avx512f,avx512vl")]
45216    unsafe fn test_mm_maskz_fmaddsub_ps() {
45217        let a = _mm_set1_ps(1.);
45218        let b = _mm_set_ps(0., 1., 2., 3.);
45219        let c = _mm_set1_ps(1.);
45220        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
45221        assert_eq_m128(r, _mm_setzero_ps());
45222        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
45223        let e = _mm_set_ps(1., 0., 3., 2.);
45224        assert_eq_m128(r, e);
45225    }
45226
45227    #[simd_test(enable = "avx512f,avx512vl")]
45228    unsafe fn test_mm_mask3_fmaddsub_ps() {
45229        let a = _mm_set1_ps(1.);
45230        let b = _mm_set_ps(0., 1., 2., 3.);
45231        let c = _mm_set1_ps(1.);
45232        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
45233        assert_eq_m128(r, c);
45234        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
45235        let e = _mm_set_ps(1., 0., 3., 2.);
45236        assert_eq_m128(r, e);
45237    }
45238
45239    #[simd_test(enable = "avx512f")]
45240    unsafe fn test_mm512_fmsubadd_ps() {
45241        let a = _mm512_setr_ps(
45242            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45243        );
45244        let b = _mm512_setr_ps(
45245            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45246        );
45247        let c = _mm512_setr_ps(
45248            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45249        );
45250        let r = _mm512_fmsubadd_ps(a, b, c);
45251        let e = _mm512_setr_ps(
45252            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
45253        );
45254        assert_eq_m512(r, e);
45255    }
45256
45257    #[simd_test(enable = "avx512f")]
45258    unsafe fn test_mm512_mask_fmsubadd_ps() {
45259        let a = _mm512_set1_ps(1.);
45260        let b = _mm512_setr_ps(
45261            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45262        );
45263        let c = _mm512_set1_ps(1.);
45264        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
45265        assert_eq_m512(r, a);
45266        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
45267        let e = _mm512_setr_ps(
45268            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45269        );
45270        assert_eq_m512(r, e);
45271    }
45272
45273    #[simd_test(enable = "avx512f")]
45274    unsafe fn test_mm512_maskz_fmsubadd_ps() {
45275        let a = _mm512_set1_ps(1.);
45276        let b = _mm512_setr_ps(
45277            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45278        );
45279        let c = _mm512_set1_ps(1.);
45280        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
45281        assert_eq_m512(r, _mm512_setzero_ps());
45282        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
45283        let e = _mm512_setr_ps(
45284            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45285        );
45286        assert_eq_m512(r, e);
45287    }
45288
45289    #[simd_test(enable = "avx512f")]
45290    unsafe fn test_mm512_mask3_fmsubadd_ps() {
45291        let a = _mm512_set1_ps(1.);
45292        let b = _mm512_setr_ps(
45293            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45294        );
45295        let c = _mm512_setr_ps(
45296            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45297        );
45298        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
45299        assert_eq_m512(r, c);
45300        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
45301        let e = _mm512_setr_ps(
45302            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45303        );
45304        assert_eq_m512(r, e);
45305    }
45306
45307    #[simd_test(enable = "avx512f,avx512vl")]
45308    unsafe fn test_mm256_mask_fmsubadd_ps() {
45309        let a = _mm256_set1_ps(1.);
45310        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45311        let c = _mm256_set1_ps(1.);
45312        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
45313        assert_eq_m256(r, a);
45314        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
45315        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45316        assert_eq_m256(r, e);
45317    }
45318
45319    #[simd_test(enable = "avx512f,avx512vl")]
45320    unsafe fn test_mm256_maskz_fmsubadd_ps() {
45321        let a = _mm256_set1_ps(1.);
45322        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45323        let c = _mm256_set1_ps(1.);
45324        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
45325        assert_eq_m256(r, _mm256_setzero_ps());
45326        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
45327        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45328        assert_eq_m256(r, e);
45329    }
45330
45331    #[simd_test(enable = "avx512f,avx512vl")]
45332    unsafe fn test_mm256_mask3_fmsubadd_ps() {
45333        let a = _mm256_set1_ps(1.);
45334        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45335        let c = _mm256_set1_ps(1.);
45336        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
45337        assert_eq_m256(r, c);
45338        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
45339        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45340        assert_eq_m256(r, e);
45341    }
45342
45343    #[simd_test(enable = "avx512f,avx512vl")]
45344    unsafe fn test_mm_mask_fmsubadd_ps() {
45345        let a = _mm_set1_ps(1.);
45346        let b = _mm_set_ps(0., 1., 2., 3.);
45347        let c = _mm_set1_ps(1.);
45348        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
45349        assert_eq_m128(r, a);
45350        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
45351        let e = _mm_set_ps(-1., 2., 1., 4.);
45352        assert_eq_m128(r, e);
45353    }
45354
45355    #[simd_test(enable = "avx512f,avx512vl")]
45356    unsafe fn test_mm_maskz_fmsubadd_ps() {
45357        let a = _mm_set1_ps(1.);
45358        let b = _mm_set_ps(0., 1., 2., 3.);
45359        let c = _mm_set1_ps(1.);
45360        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
45361        assert_eq_m128(r, _mm_setzero_ps());
45362        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
45363        let e = _mm_set_ps(-1., 2., 1., 4.);
45364        assert_eq_m128(r, e);
45365    }
45366
45367    #[simd_test(enable = "avx512f,avx512vl")]
45368    unsafe fn test_mm_mask3_fmsubadd_ps() {
45369        let a = _mm_set1_ps(1.);
45370        let b = _mm_set_ps(0., 1., 2., 3.);
45371        let c = _mm_set1_ps(1.);
45372        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
45373        assert_eq_m128(r, c);
45374        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
45375        let e = _mm_set_ps(-1., 2., 1., 4.);
45376        assert_eq_m128(r, e);
45377    }
45378
45379    #[simd_test(enable = "avx512f")]
45380    unsafe fn test_mm512_fnmadd_ps() {
45381        let a = _mm512_set1_ps(1.);
45382        let b = _mm512_setr_ps(
45383            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45384        );
45385        let c = _mm512_set1_ps(1.);
45386        let r = _mm512_fnmadd_ps(a, b, c);
45387        let e = _mm512_setr_ps(
45388            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
45389        );
45390        assert_eq_m512(r, e);
45391    }
45392
45393    #[simd_test(enable = "avx512f")]
45394    unsafe fn test_mm512_mask_fnmadd_ps() {
45395        let a = _mm512_set1_ps(1.);
45396        let b = _mm512_setr_ps(
45397            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45398        );
45399        let c = _mm512_set1_ps(1.);
45400        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
45401        assert_eq_m512(r, a);
45402        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
45403        let e = _mm512_setr_ps(
45404            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
45405        );
45406        assert_eq_m512(r, e);
45407    }
45408
45409    #[simd_test(enable = "avx512f")]
45410    unsafe fn test_mm512_maskz_fnmadd_ps() {
45411        let a = _mm512_set1_ps(1.);
45412        let b = _mm512_setr_ps(
45413            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45414        );
45415        let c = _mm512_set1_ps(1.);
45416        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
45417        assert_eq_m512(r, _mm512_setzero_ps());
45418        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
45419        let e = _mm512_setr_ps(
45420            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
45421        );
45422        assert_eq_m512(r, e);
45423    }
45424
45425    #[simd_test(enable = "avx512f")]
45426    unsafe fn test_mm512_mask3_fnmadd_ps() {
45427        let a = _mm512_set1_ps(1.);
45428        let b = _mm512_setr_ps(
45429            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45430        );
45431        let c = _mm512_setr_ps(
45432            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45433        );
45434        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
45435        assert_eq_m512(r, c);
45436        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
45437        let e = _mm512_setr_ps(
45438            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
45439        );
45440        assert_eq_m512(r, e);
45441    }
45442
45443    #[simd_test(enable = "avx512f,avx512vl")]
45444    unsafe fn test_mm256_mask_fnmadd_ps() {
45445        let a = _mm256_set1_ps(1.);
45446        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45447        let c = _mm256_set1_ps(1.);
45448        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
45449        assert_eq_m256(r, a);
45450        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
45451        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45452        assert_eq_m256(r, e);
45453    }
45454
45455    #[simd_test(enable = "avx512f,avx512vl")]
45456    unsafe fn test_mm256_maskz_fnmadd_ps() {
45457        let a = _mm256_set1_ps(1.);
45458        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45459        let c = _mm256_set1_ps(1.);
45460        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
45461        assert_eq_m256(r, _mm256_setzero_ps());
45462        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
45463        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45464        assert_eq_m256(r, e);
45465    }
45466
45467    #[simd_test(enable = "avx512f,avx512vl")]
45468    unsafe fn test_mm256_mask3_fnmadd_ps() {
45469        let a = _mm256_set1_ps(1.);
45470        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45471        let c = _mm256_set1_ps(1.);
45472        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
45473        assert_eq_m256(r, c);
45474        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
45475        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45476        assert_eq_m256(r, e);
45477    }
45478
45479    #[simd_test(enable = "avx512f,avx512vl")]
45480    unsafe fn test_mm_mask_fnmadd_ps() {
45481        let a = _mm_set1_ps(1.);
45482        let b = _mm_set_ps(0., 1., 2., 3.);
45483        let c = _mm_set1_ps(1.);
45484        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
45485        assert_eq_m128(r, a);
45486        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
45487        let e = _mm_set_ps(1., 0., -1., -2.);
45488        assert_eq_m128(r, e);
45489    }
45490
45491    #[simd_test(enable = "avx512f,avx512vl")]
45492    unsafe fn test_mm_maskz_fnmadd_ps() {
45493        let a = _mm_set1_ps(1.);
45494        let b = _mm_set_ps(0., 1., 2., 3.);
45495        let c = _mm_set1_ps(1.);
45496        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
45497        assert_eq_m128(r, _mm_setzero_ps());
45498        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
45499        let e = _mm_set_ps(1., 0., -1., -2.);
45500        assert_eq_m128(r, e);
45501    }
45502
45503    #[simd_test(enable = "avx512f,avx512vl")]
45504    unsafe fn test_mm_mask3_fnmadd_ps() {
45505        let a = _mm_set1_ps(1.);
45506        let b = _mm_set_ps(0., 1., 2., 3.);
45507        let c = _mm_set1_ps(1.);
45508        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
45509        assert_eq_m128(r, c);
45510        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
45511        let e = _mm_set_ps(1., 0., -1., -2.);
45512        assert_eq_m128(r, e);
45513    }
45514
45515    #[simd_test(enable = "avx512f")]
45516    unsafe fn test_mm512_fnmsub_ps() {
45517        let a = _mm512_set1_ps(1.);
45518        let b = _mm512_setr_ps(
45519            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45520        );
45521        let c = _mm512_set1_ps(1.);
45522        let r = _mm512_fnmsub_ps(a, b, c);
45523        let e = _mm512_setr_ps(
45524            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
45525        );
45526        assert_eq_m512(r, e);
45527    }
45528
45529    #[simd_test(enable = "avx512f")]
45530    unsafe fn test_mm512_mask_fnmsub_ps() {
45531        let a = _mm512_set1_ps(1.);
45532        let b = _mm512_setr_ps(
45533            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45534        );
45535        let c = _mm512_set1_ps(1.);
45536        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
45537        assert_eq_m512(r, a);
45538        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
45539        let e = _mm512_setr_ps(
45540            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
45541        );
45542        assert_eq_m512(r, e);
45543    }
45544
45545    #[simd_test(enable = "avx512f")]
45546    unsafe fn test_mm512_maskz_fnmsub_ps() {
45547        let a = _mm512_set1_ps(1.);
45548        let b = _mm512_setr_ps(
45549            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45550        );
45551        let c = _mm512_set1_ps(1.);
45552        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
45553        assert_eq_m512(r, _mm512_setzero_ps());
45554        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
45555        let e = _mm512_setr_ps(
45556            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
45557        );
45558        assert_eq_m512(r, e);
45559    }
45560
45561    #[simd_test(enable = "avx512f")]
45562    unsafe fn test_mm512_mask3_fnmsub_ps() {
45563        let a = _mm512_set1_ps(1.);
45564        let b = _mm512_setr_ps(
45565            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45566        );
45567        let c = _mm512_setr_ps(
45568            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45569        );
45570        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
45571        assert_eq_m512(r, c);
45572        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
45573        let e = _mm512_setr_ps(
45574            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
45575        );
45576        assert_eq_m512(r, e);
45577    }
45578
45579    #[simd_test(enable = "avx512f,avx512vl")]
45580    unsafe fn test_mm256_mask_fnmsub_ps() {
45581        let a = _mm256_set1_ps(1.);
45582        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45583        let c = _mm256_set1_ps(1.);
45584        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
45585        assert_eq_m256(r, a);
45586        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
45587        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45588        assert_eq_m256(r, e);
45589    }
45590
45591    #[simd_test(enable = "avx512f,avx512vl")]
45592    unsafe fn test_mm256_maskz_fnmsub_ps() {
45593        let a = _mm256_set1_ps(1.);
45594        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45595        let c = _mm256_set1_ps(1.);
45596        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
45597        assert_eq_m256(r, _mm256_setzero_ps());
45598        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
45599        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45600        assert_eq_m256(r, e);
45601    }
45602
45603    #[simd_test(enable = "avx512f,avx512vl")]
45604    unsafe fn test_mm256_mask3_fnmsub_ps() {
45605        let a = _mm256_set1_ps(1.);
45606        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45607        let c = _mm256_set1_ps(1.);
45608        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
45609        assert_eq_m256(r, c);
45610        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
45611        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45612        assert_eq_m256(r, e);
45613    }
45614
45615    #[simd_test(enable = "avx512f,avx512vl")]
45616    unsafe fn test_mm_mask_fnmsub_ps() {
45617        let a = _mm_set1_ps(1.);
45618        let b = _mm_set_ps(0., 1., 2., 3.);
45619        let c = _mm_set1_ps(1.);
45620        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
45621        assert_eq_m128(r, a);
45622        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
45623        let e = _mm_set_ps(-1., -2., -3., -4.);
45624        assert_eq_m128(r, e);
45625    }
45626
45627    #[simd_test(enable = "avx512f,avx512vl")]
45628    unsafe fn test_mm_maskz_fnmsub_ps() {
45629        let a = _mm_set1_ps(1.);
45630        let b = _mm_set_ps(0., 1., 2., 3.);
45631        let c = _mm_set1_ps(1.);
45632        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
45633        assert_eq_m128(r, _mm_setzero_ps());
45634        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
45635        let e = _mm_set_ps(-1., -2., -3., -4.);
45636        assert_eq_m128(r, e);
45637    }
45638
45639    #[simd_test(enable = "avx512f,avx512vl")]
45640    unsafe fn test_mm_mask3_fnmsub_ps() {
45641        let a = _mm_set1_ps(1.);
45642        let b = _mm_set_ps(0., 1., 2., 3.);
45643        let c = _mm_set1_ps(1.);
45644        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
45645        assert_eq_m128(r, c);
45646        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
45647        let e = _mm_set_ps(-1., -2., -3., -4.);
45648        assert_eq_m128(r, e);
45649    }
45650
45651    #[simd_test(enable = "avx512f")]
45652    unsafe fn test_mm512_rcp14_ps() {
45653        let a = _mm512_set1_ps(3.);
45654        let r = _mm512_rcp14_ps(a);
45655        let e = _mm512_set1_ps(0.33333206);
45656        assert_eq_m512(r, e);
45657    }
45658
45659    #[simd_test(enable = "avx512f")]
45660    unsafe fn test_mm512_mask_rcp14_ps() {
45661        let a = _mm512_set1_ps(3.);
45662        let r = _mm512_mask_rcp14_ps(a, 0, a);
45663        assert_eq_m512(r, a);
45664        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
45665        let e = _mm512_setr_ps(
45666            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45667            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45668        );
45669        assert_eq_m512(r, e);
45670    }
45671
45672    #[simd_test(enable = "avx512f")]
45673    unsafe fn test_mm512_maskz_rcp14_ps() {
45674        let a = _mm512_set1_ps(3.);
45675        let r = _mm512_maskz_rcp14_ps(0, a);
45676        assert_eq_m512(r, _mm512_setzero_ps());
45677        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
45678        let e = _mm512_setr_ps(
45679            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45680            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45681        );
45682        assert_eq_m512(r, e);
45683    }
45684
45685    #[simd_test(enable = "avx512f,avx512vl")]
45686    unsafe fn test_mm256_rcp14_ps() {
45687        let a = _mm256_set1_ps(3.);
45688        let r = _mm256_rcp14_ps(a);
45689        let e = _mm256_set1_ps(0.33333206);
45690        assert_eq_m256(r, e);
45691    }
45692
45693    #[simd_test(enable = "avx512f,avx512vl")]
45694    unsafe fn test_mm256_mask_rcp14_ps() {
45695        let a = _mm256_set1_ps(3.);
45696        let r = _mm256_mask_rcp14_ps(a, 0, a);
45697        assert_eq_m256(r, a);
45698        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
45699        let e = _mm256_set1_ps(0.33333206);
45700        assert_eq_m256(r, e);
45701    }
45702
45703    #[simd_test(enable = "avx512f,avx512vl")]
45704    unsafe fn test_mm256_maskz_rcp14_ps() {
45705        let a = _mm256_set1_ps(3.);
45706        let r = _mm256_maskz_rcp14_ps(0, a);
45707        assert_eq_m256(r, _mm256_setzero_ps());
45708        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
45709        let e = _mm256_set1_ps(0.33333206);
45710        assert_eq_m256(r, e);
45711    }
45712
45713    #[simd_test(enable = "avx512f,avx512vl")]
45714    unsafe fn test_mm_rcp14_ps() {
45715        let a = _mm_set1_ps(3.);
45716        let r = _mm_rcp14_ps(a);
45717        let e = _mm_set1_ps(0.33333206);
45718        assert_eq_m128(r, e);
45719    }
45720
45721    #[simd_test(enable = "avx512f,avx512vl")]
45722    unsafe fn test_mm_mask_rcp14_ps() {
45723        let a = _mm_set1_ps(3.);
45724        let r = _mm_mask_rcp14_ps(a, 0, a);
45725        assert_eq_m128(r, a);
45726        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
45727        let e = _mm_set1_ps(0.33333206);
45728        assert_eq_m128(r, e);
45729    }
45730
45731    #[simd_test(enable = "avx512f,avx512vl")]
45732    unsafe fn test_mm_maskz_rcp14_ps() {
45733        let a = _mm_set1_ps(3.);
45734        let r = _mm_maskz_rcp14_ps(0, a);
45735        assert_eq_m128(r, _mm_setzero_ps());
45736        let r = _mm_maskz_rcp14_ps(0b00001111, a);
45737        let e = _mm_set1_ps(0.33333206);
45738        assert_eq_m128(r, e);
45739    }
45740
45741    #[simd_test(enable = "avx512f")]
45742    unsafe fn test_mm512_rsqrt14_ps() {
45743        let a = _mm512_set1_ps(3.);
45744        let r = _mm512_rsqrt14_ps(a);
45745        let e = _mm512_set1_ps(0.5773392);
45746        assert_eq_m512(r, e);
45747    }
45748
45749    #[simd_test(enable = "avx512f")]
45750    unsafe fn test_mm512_mask_rsqrt14_ps() {
45751        let a = _mm512_set1_ps(3.);
45752        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
45753        assert_eq_m512(r, a);
45754        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
45755        let e = _mm512_setr_ps(
45756            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45757            0.5773392, 0.5773392, 0.5773392,
45758        );
45759        assert_eq_m512(r, e);
45760    }
45761
45762    #[simd_test(enable = "avx512f")]
45763    unsafe fn test_mm512_maskz_rsqrt14_ps() {
45764        let a = _mm512_set1_ps(3.);
45765        let r = _mm512_maskz_rsqrt14_ps(0, a);
45766        assert_eq_m512(r, _mm512_setzero_ps());
45767        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
45768        let e = _mm512_setr_ps(
45769            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45770            0.5773392, 0.5773392, 0.5773392,
45771        );
45772        assert_eq_m512(r, e);
45773    }
45774
45775    #[simd_test(enable = "avx512f,avx512vl")]
45776    unsafe fn test_mm256_rsqrt14_ps() {
45777        let a = _mm256_set1_ps(3.);
45778        let r = _mm256_rsqrt14_ps(a);
45779        let e = _mm256_set1_ps(0.5773392);
45780        assert_eq_m256(r, e);
45781    }
45782
45783    #[simd_test(enable = "avx512f,avx512vl")]
45784    unsafe fn test_mm256_mask_rsqrt14_ps() {
45785        let a = _mm256_set1_ps(3.);
45786        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
45787        assert_eq_m256(r, a);
45788        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
45789        let e = _mm256_set1_ps(0.5773392);
45790        assert_eq_m256(r, e);
45791    }
45792
45793    #[simd_test(enable = "avx512f,avx512vl")]
45794    unsafe fn test_mm256_maskz_rsqrt14_ps() {
45795        let a = _mm256_set1_ps(3.);
45796        let r = _mm256_maskz_rsqrt14_ps(0, a);
45797        assert_eq_m256(r, _mm256_setzero_ps());
45798        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
45799        let e = _mm256_set1_ps(0.5773392);
45800        assert_eq_m256(r, e);
45801    }
45802
45803    #[simd_test(enable = "avx512f,avx512vl")]
45804    unsafe fn test_mm_rsqrt14_ps() {
45805        let a = _mm_set1_ps(3.);
45806        let r = _mm_rsqrt14_ps(a);
45807        let e = _mm_set1_ps(0.5773392);
45808        assert_eq_m128(r, e);
45809    }
45810
45811    #[simd_test(enable = "avx512f,avx512vl")]
45812    unsafe fn test_mm_mask_rsqrt14_ps() {
45813        let a = _mm_set1_ps(3.);
45814        let r = _mm_mask_rsqrt14_ps(a, 0, a);
45815        assert_eq_m128(r, a);
45816        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
45817        let e = _mm_set1_ps(0.5773392);
45818        assert_eq_m128(r, e);
45819    }
45820
45821    #[simd_test(enable = "avx512f,avx512vl")]
45822    unsafe fn test_mm_maskz_rsqrt14_ps() {
45823        let a = _mm_set1_ps(3.);
45824        let r = _mm_maskz_rsqrt14_ps(0, a);
45825        assert_eq_m128(r, _mm_setzero_ps());
45826        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
45827        let e = _mm_set1_ps(0.5773392);
45828        assert_eq_m128(r, e);
45829    }
45830
45831    #[simd_test(enable = "avx512f")]
45832    unsafe fn test_mm512_getexp_ps() {
45833        let a = _mm512_set1_ps(3.);
45834        let r = _mm512_getexp_ps(a);
45835        let e = _mm512_set1_ps(1.);
45836        assert_eq_m512(r, e);
45837    }
45838
45839    #[simd_test(enable = "avx512f")]
45840    unsafe fn test_mm512_mask_getexp_ps() {
45841        let a = _mm512_set1_ps(3.);
45842        let r = _mm512_mask_getexp_ps(a, 0, a);
45843        assert_eq_m512(r, a);
45844        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
45845        let e = _mm512_setr_ps(
45846            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
45847        );
45848        assert_eq_m512(r, e);
45849    }
45850
45851    #[simd_test(enable = "avx512f")]
45852    unsafe fn test_mm512_maskz_getexp_ps() {
45853        let a = _mm512_set1_ps(3.);
45854        let r = _mm512_maskz_getexp_ps(0, a);
45855        assert_eq_m512(r, _mm512_setzero_ps());
45856        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
45857        let e = _mm512_setr_ps(
45858            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45859        );
45860        assert_eq_m512(r, e);
45861    }
45862
45863    #[simd_test(enable = "avx512f,avx512vl")]
45864    unsafe fn test_mm256_getexp_ps() {
45865        let a = _mm256_set1_ps(3.);
45866        let r = _mm256_getexp_ps(a);
45867        let e = _mm256_set1_ps(1.);
45868        assert_eq_m256(r, e);
45869    }
45870
45871    #[simd_test(enable = "avx512f,avx512vl")]
45872    unsafe fn test_mm256_mask_getexp_ps() {
45873        let a = _mm256_set1_ps(3.);
45874        let r = _mm256_mask_getexp_ps(a, 0, a);
45875        assert_eq_m256(r, a);
45876        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
45877        let e = _mm256_set1_ps(1.);
45878        assert_eq_m256(r, e);
45879    }
45880
45881    #[simd_test(enable = "avx512f,avx512vl")]
45882    unsafe fn test_mm256_maskz_getexp_ps() {
45883        let a = _mm256_set1_ps(3.);
45884        let r = _mm256_maskz_getexp_ps(0, a);
45885        assert_eq_m256(r, _mm256_setzero_ps());
45886        let r = _mm256_maskz_getexp_ps(0b11111111, a);
45887        let e = _mm256_set1_ps(1.);
45888        assert_eq_m256(r, e);
45889    }
45890
45891    #[simd_test(enable = "avx512f,avx512vl")]
45892    unsafe fn test_mm_getexp_ps() {
45893        let a = _mm_set1_ps(3.);
45894        let r = _mm_getexp_ps(a);
45895        let e = _mm_set1_ps(1.);
45896        assert_eq_m128(r, e);
45897    }
45898
45899    #[simd_test(enable = "avx512f,avx512vl")]
45900    unsafe fn test_mm_mask_getexp_ps() {
45901        let a = _mm_set1_ps(3.);
45902        let r = _mm_mask_getexp_ps(a, 0, a);
45903        assert_eq_m128(r, a);
45904        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
45905        let e = _mm_set1_ps(1.);
45906        assert_eq_m128(r, e);
45907    }
45908
45909    #[simd_test(enable = "avx512f,avx512vl")]
45910    unsafe fn test_mm_maskz_getexp_ps() {
45911        let a = _mm_set1_ps(3.);
45912        let r = _mm_maskz_getexp_ps(0, a);
45913        assert_eq_m128(r, _mm_setzero_ps());
45914        let r = _mm_maskz_getexp_ps(0b00001111, a);
45915        let e = _mm_set1_ps(1.);
45916        assert_eq_m128(r, e);
45917    }
45918
45919    #[simd_test(enable = "avx512f")]
45920    unsafe fn test_mm512_roundscale_ps() {
45921        let a = _mm512_set1_ps(1.1);
45922        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
45923        let e = _mm512_set1_ps(1.0);
45924        assert_eq_m512(r, e);
45925    }
45926
45927    #[simd_test(enable = "avx512f")]
45928    unsafe fn test_mm512_mask_roundscale_ps() {
45929        let a = _mm512_set1_ps(1.1);
45930        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
45931        let e = _mm512_set1_ps(1.1);
45932        assert_eq_m512(r, e);
45933        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
45934        let e = _mm512_set1_ps(1.0);
45935        assert_eq_m512(r, e);
45936    }
45937
45938    #[simd_test(enable = "avx512f")]
45939    unsafe fn test_mm512_maskz_roundscale_ps() {
45940        let a = _mm512_set1_ps(1.1);
45941        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
45942        assert_eq_m512(r, _mm512_setzero_ps());
45943        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
45944        let e = _mm512_set1_ps(1.0);
45945        assert_eq_m512(r, e);
45946    }
45947
45948    #[simd_test(enable = "avx512f,avx512vl")]
45949    unsafe fn test_mm256_roundscale_ps() {
45950        let a = _mm256_set1_ps(1.1);
45951        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
45952        let e = _mm256_set1_ps(1.0);
45953        assert_eq_m256(r, e);
45954    }
45955
45956    #[simd_test(enable = "avx512f,avx512vl")]
45957    unsafe fn test_mm256_mask_roundscale_ps() {
45958        let a = _mm256_set1_ps(1.1);
45959        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
45960        let e = _mm256_set1_ps(1.1);
45961        assert_eq_m256(r, e);
45962        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
45963        let e = _mm256_set1_ps(1.0);
45964        assert_eq_m256(r, e);
45965    }
45966
45967    #[simd_test(enable = "avx512f,avx512vl")]
45968    unsafe fn test_mm256_maskz_roundscale_ps() {
45969        let a = _mm256_set1_ps(1.1);
45970        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
45971        assert_eq_m256(r, _mm256_setzero_ps());
45972        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
45973        let e = _mm256_set1_ps(1.0);
45974        assert_eq_m256(r, e);
45975    }
45976
45977    #[simd_test(enable = "avx512f,avx512vl")]
45978    unsafe fn test_mm_roundscale_ps() {
45979        let a = _mm_set1_ps(1.1);
45980        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
45981        let e = _mm_set1_ps(1.0);
45982        assert_eq_m128(r, e);
45983    }
45984
45985    #[simd_test(enable = "avx512f,avx512vl")]
45986    unsafe fn test_mm_mask_roundscale_ps() {
45987        let a = _mm_set1_ps(1.1);
45988        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
45989        let e = _mm_set1_ps(1.1);
45990        assert_eq_m128(r, e);
45991        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
45992        let e = _mm_set1_ps(1.0);
45993        assert_eq_m128(r, e);
45994    }
45995
45996    #[simd_test(enable = "avx512f,avx512vl")]
45997    unsafe fn test_mm_maskz_roundscale_ps() {
45998        let a = _mm_set1_ps(1.1);
45999        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46000        assert_eq_m128(r, _mm_setzero_ps());
46001        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
46002        let e = _mm_set1_ps(1.0);
46003        assert_eq_m128(r, e);
46004    }
46005
46006    #[simd_test(enable = "avx512f")]
46007    unsafe fn test_mm512_scalef_ps() {
46008        let a = _mm512_set1_ps(1.);
46009        let b = _mm512_set1_ps(3.);
46010        let r = _mm512_scalef_ps(a, b);
46011        let e = _mm512_set1_ps(8.);
46012        assert_eq_m512(r, e);
46013    }
46014
46015    #[simd_test(enable = "avx512f")]
46016    unsafe fn test_mm512_mask_scalef_ps() {
46017        let a = _mm512_set1_ps(1.);
46018        let b = _mm512_set1_ps(3.);
46019        let r = _mm512_mask_scalef_ps(a, 0, a, b);
46020        assert_eq_m512(r, a);
46021        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
46022        let e = _mm512_set_ps(
46023            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46024        );
46025        assert_eq_m512(r, e);
46026    }
46027
46028    #[simd_test(enable = "avx512f")]
46029    unsafe fn test_mm512_maskz_scalef_ps() {
46030        let a = _mm512_set1_ps(1.);
46031        let b = _mm512_set1_ps(3.);
46032        let r = _mm512_maskz_scalef_ps(0, a, b);
46033        assert_eq_m512(r, _mm512_setzero_ps());
46034        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
46035        let e = _mm512_set_ps(
46036            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46037        );
46038        assert_eq_m512(r, e);
46039    }
46040
46041    #[simd_test(enable = "avx512f,avx512vl")]
46042    unsafe fn test_mm256_scalef_ps() {
46043        let a = _mm256_set1_ps(1.);
46044        let b = _mm256_set1_ps(3.);
46045        let r = _mm256_scalef_ps(a, b);
46046        let e = _mm256_set1_ps(8.);
46047        assert_eq_m256(r, e);
46048    }
46049
46050    #[simd_test(enable = "avx512f,avx512vl")]
46051    unsafe fn test_mm256_mask_scalef_ps() {
46052        let a = _mm256_set1_ps(1.);
46053        let b = _mm256_set1_ps(3.);
46054        let r = _mm256_mask_scalef_ps(a, 0, a, b);
46055        assert_eq_m256(r, a);
46056        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
46057        let e = _mm256_set1_ps(8.);
46058        assert_eq_m256(r, e);
46059    }
46060
46061    #[simd_test(enable = "avx512f,avx512vl")]
46062    unsafe fn test_mm256_maskz_scalef_ps() {
46063        let a = _mm256_set1_ps(1.);
46064        let b = _mm256_set1_ps(3.);
46065        let r = _mm256_maskz_scalef_ps(0, a, b);
46066        assert_eq_m256(r, _mm256_setzero_ps());
46067        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
46068        let e = _mm256_set1_ps(8.);
46069        assert_eq_m256(r, e);
46070    }
46071
46072    #[simd_test(enable = "avx512f,avx512vl")]
46073    unsafe fn test_mm_scalef_ps() {
46074        let a = _mm_set1_ps(1.);
46075        let b = _mm_set1_ps(3.);
46076        let r = _mm_scalef_ps(a, b);
46077        let e = _mm_set1_ps(8.);
46078        assert_eq_m128(r, e);
46079    }
46080
46081    #[simd_test(enable = "avx512f,avx512vl")]
46082    unsafe fn test_mm_mask_scalef_ps() {
46083        let a = _mm_set1_ps(1.);
46084        let b = _mm_set1_ps(3.);
46085        let r = _mm_mask_scalef_ps(a, 0, a, b);
46086        assert_eq_m128(r, a);
46087        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
46088        let e = _mm_set1_ps(8.);
46089        assert_eq_m128(r, e);
46090    }
46091
46092    #[simd_test(enable = "avx512f,avx512vl")]
46093    unsafe fn test_mm_maskz_scalef_ps() {
46094        let a = _mm_set1_ps(1.);
46095        let b = _mm_set1_ps(3.);
46096        let r = _mm_maskz_scalef_ps(0, a, b);
46097        assert_eq_m128(r, _mm_setzero_ps());
46098        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
46099        let e = _mm_set1_ps(8.);
46100        assert_eq_m128(r, e);
46101    }
46102
46103    #[simd_test(enable = "avx512f")]
46104    unsafe fn test_mm512_fixupimm_ps() {
46105        let a = _mm512_set1_ps(f32::NAN);
46106        let b = _mm512_set1_ps(f32::MAX);
46107        let c = _mm512_set1_epi32(i32::MAX);
46108        //let r = _mm512_fixupimm_ps(a, b, c, 5);
46109        let r = _mm512_fixupimm_ps::<5>(a, b, c);
46110        let e = _mm512_set1_ps(0.0);
46111        assert_eq_m512(r, e);
46112    }
46113
46114    #[simd_test(enable = "avx512f")]
46115    unsafe fn test_mm512_mask_fixupimm_ps() {
46116        #[rustfmt::skip]
46117        let a = _mm512_set_ps(
46118            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46119            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46120            1., 1., 1., 1.,
46121            1., 1., 1., 1.,
46122        );
46123        let b = _mm512_set1_ps(f32::MAX);
46124        let c = _mm512_set1_epi32(i32::MAX);
46125        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
46126        let e = _mm512_set_ps(
46127            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46128        );
46129        assert_eq_m512(r, e);
46130    }
46131
46132    #[simd_test(enable = "avx512f")]
46133    unsafe fn test_mm512_maskz_fixupimm_ps() {
46134        #[rustfmt::skip]
46135        let a = _mm512_set_ps(
46136            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46137            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46138            1., 1., 1., 1.,
46139            1., 1., 1., 1.,
46140        );
46141        let b = _mm512_set1_ps(f32::MAX);
46142        let c = _mm512_set1_epi32(i32::MAX);
46143        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
46144        let e = _mm512_set_ps(
46145            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
46146        );
46147        assert_eq_m512(r, e);
46148    }
46149
46150    #[simd_test(enable = "avx512f,avx512vl")]
46151    unsafe fn test_mm256_fixupimm_ps() {
46152        let a = _mm256_set1_ps(f32::NAN);
46153        let b = _mm256_set1_ps(f32::MAX);
46154        let c = _mm256_set1_epi32(i32::MAX);
46155        let r = _mm256_fixupimm_ps::<5>(a, b, c);
46156        let e = _mm256_set1_ps(0.0);
46157        assert_eq_m256(r, e);
46158    }
46159
46160    #[simd_test(enable = "avx512f,avx512vl")]
46161    unsafe fn test_mm256_mask_fixupimm_ps() {
46162        let a = _mm256_set1_ps(f32::NAN);
46163        let b = _mm256_set1_ps(f32::MAX);
46164        let c = _mm256_set1_epi32(i32::MAX);
46165        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
46166        let e = _mm256_set1_ps(0.0);
46167        assert_eq_m256(r, e);
46168    }
46169
46170    #[simd_test(enable = "avx512f,avx512vl")]
46171    unsafe fn test_mm256_maskz_fixupimm_ps() {
46172        let a = _mm256_set1_ps(f32::NAN);
46173        let b = _mm256_set1_ps(f32::MAX);
46174        let c = _mm256_set1_epi32(i32::MAX);
46175        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
46176        let e = _mm256_set1_ps(0.0);
46177        assert_eq_m256(r, e);
46178    }
46179
46180    #[simd_test(enable = "avx512f,avx512vl")]
46181    unsafe fn test_mm_fixupimm_ps() {
46182        let a = _mm_set1_ps(f32::NAN);
46183        let b = _mm_set1_ps(f32::MAX);
46184        let c = _mm_set1_epi32(i32::MAX);
46185        let r = _mm_fixupimm_ps::<5>(a, b, c);
46186        let e = _mm_set1_ps(0.0);
46187        assert_eq_m128(r, e);
46188    }
46189
46190    #[simd_test(enable = "avx512f,avx512vl")]
46191    unsafe fn test_mm_mask_fixupimm_ps() {
46192        let a = _mm_set1_ps(f32::NAN);
46193        let b = _mm_set1_ps(f32::MAX);
46194        let c = _mm_set1_epi32(i32::MAX);
46195        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
46196        let e = _mm_set1_ps(0.0);
46197        assert_eq_m128(r, e);
46198    }
46199
46200    #[simd_test(enable = "avx512f,avx512vl")]
46201    unsafe fn test_mm_maskz_fixupimm_ps() {
46202        let a = _mm_set1_ps(f32::NAN);
46203        let b = _mm_set1_ps(f32::MAX);
46204        let c = _mm_set1_epi32(i32::MAX);
46205        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
46206        let e = _mm_set1_ps(0.0);
46207        assert_eq_m128(r, e);
46208    }
46209
46210    #[simd_test(enable = "avx512f")]
46211    unsafe fn test_mm512_ternarylogic_epi32() {
46212        use core::intrinsics::simd::simd_xor;
46213
46214        let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101);
46215        let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110);
46216        let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111);
46217
46218        // Identity of A.
46219        let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c);
46220        assert_eq_m512i(r, a);
46221
46222        // Bitwise xor.
46223        let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c);
46224        let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100);
46225        assert_eq_m512i(r, e);
46226        assert_eq_m512i(r, simd_xor(simd_xor(a, b), c));
46227
46228        // Majority (2 or more bits set).
46229        let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c);
46230        let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111);
46231        assert_eq_m512i(r, e);
46232    }
46233
46234    #[simd_test(enable = "avx512f")]
46235    unsafe fn test_mm512_mask_ternarylogic_epi32() {
46236        let src = _mm512_set1_epi32(1 << 2);
46237        let a = _mm512_set1_epi32(1 << 1);
46238        let b = _mm512_set1_epi32(1 << 0);
46239        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46240        assert_eq_m512i(r, src);
46241        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
46242        let e = _mm512_set1_epi32(0);
46243        assert_eq_m512i(r, e);
46244    }
46245
46246    #[simd_test(enable = "avx512f")]
46247    unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46248        let a = _mm512_set1_epi32(1 << 2);
46249        let b = _mm512_set1_epi32(1 << 1);
46250        let c = _mm512_set1_epi32(1 << 0);
46251        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46252        assert_eq_m512i(r, _mm512_setzero_si512());
46253        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
46254        let e = _mm512_set1_epi32(0);
46255        assert_eq_m512i(r, e);
46256    }
46257
46258    #[simd_test(enable = "avx512f,avx512vl")]
46259    unsafe fn test_mm256_ternarylogic_epi32() {
46260        use core::intrinsics::simd::simd_xor;
46261
46262        let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
46263
46264        let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101);
46265        let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110);
46266        let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111);
46267
46268        // Identity of A.
46269        let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c);
46270        assert_eq_m256i(r, a);
46271
46272        // Bitwise xor.
46273        let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c);
46274        let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100);
46275        assert_eq_m256i(r, e);
46276        assert_eq_m256i(r, simd_xor(simd_xor(a, b), c));
46277
46278        // Majority (2 or more bits set).
46279        let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c);
46280        let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111);
46281        assert_eq_m256i(r, e);
46282    }
46283
46284    #[simd_test(enable = "avx512f,avx512vl")]
46285    unsafe fn test_mm256_mask_ternarylogic_epi32() {
46286        let src = _mm256_set1_epi32(1 << 2);
46287        let a = _mm256_set1_epi32(1 << 1);
46288        let b = _mm256_set1_epi32(1 << 0);
46289        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46290        assert_eq_m256i(r, src);
46291        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
46292        let e = _mm256_set1_epi32(0);
46293        assert_eq_m256i(r, e);
46294    }
46295
46296    #[simd_test(enable = "avx512f,avx512vl")]
46297    unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46298        let a = _mm256_set1_epi32(1 << 2);
46299        let b = _mm256_set1_epi32(1 << 1);
46300        let c = _mm256_set1_epi32(1 << 0);
46301        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46302        assert_eq_m256i(r, _mm256_setzero_si256());
46303        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
46304        let e = _mm256_set1_epi32(0);
46305        assert_eq_m256i(r, e);
46306    }
46307
46308    #[simd_test(enable = "avx512f,avx512vl")]
46309    unsafe fn test_mm_ternarylogic_epi32() {
46310        use core::intrinsics::simd::simd_xor;
46311
46312        let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101);
46313        let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110);
46314        let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111);
46315
46316        // Identity of A.
46317        let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c);
46318        assert_eq_m128i(r, a);
46319
46320        // Bitwise xor.
46321        let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c);
46322        let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100);
46323        assert_eq_m128i(r, e);
46324        assert_eq_m128i(r, simd_xor(simd_xor(a, b), c));
46325
46326        // Majority (2 or more bits set).
46327        let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c);
46328        let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111);
46329        assert_eq_m128i(r, e);
46330    }
46331
46332    #[simd_test(enable = "avx512f,avx512vl")]
46333    unsafe fn test_mm_mask_ternarylogic_epi32() {
46334        let src = _mm_set1_epi32(1 << 2);
46335        let a = _mm_set1_epi32(1 << 1);
46336        let b = _mm_set1_epi32(1 << 0);
46337        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46338        assert_eq_m128i(r, src);
46339        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
46340        let e = _mm_set1_epi32(0);
46341        assert_eq_m128i(r, e);
46342    }
46343
46344    #[simd_test(enable = "avx512f,avx512vl")]
46345    unsafe fn test_mm_maskz_ternarylogic_epi32() {
46346        let a = _mm_set1_epi32(1 << 2);
46347        let b = _mm_set1_epi32(1 << 1);
46348        let c = _mm_set1_epi32(1 << 0);
46349        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46350        assert_eq_m128i(r, _mm_setzero_si128());
46351        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
46352        let e = _mm_set1_epi32(0);
46353        assert_eq_m128i(r, e);
46354    }
46355
46356    #[simd_test(enable = "avx512f")]
46357    unsafe fn test_mm512_getmant_ps() {
46358        let a = _mm512_set1_ps(10.);
46359        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46360        let e = _mm512_set1_ps(1.25);
46361        assert_eq_m512(r, e);
46362    }
46363
46364    #[simd_test(enable = "avx512f")]
46365    unsafe fn test_mm512_mask_getmant_ps() {
46366        let a = _mm512_set1_ps(10.);
46367        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46368        assert_eq_m512(r, a);
46369        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46370            a,
46371            0b11111111_00000000,
46372            a,
46373        );
46374        let e = _mm512_setr_ps(
46375            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46376        );
46377        assert_eq_m512(r, e);
46378    }
46379
46380    #[simd_test(enable = "avx512f")]
46381    unsafe fn test_mm512_maskz_getmant_ps() {
46382        let a = _mm512_set1_ps(10.);
46383        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46384        assert_eq_m512(r, _mm512_setzero_ps());
46385        let r =
46386            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
46387        let e = _mm512_setr_ps(
46388            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46389        );
46390        assert_eq_m512(r, e);
46391    }
46392
46393    #[simd_test(enable = "avx512f,avx512vl")]
46394    unsafe fn test_mm256_getmant_ps() {
46395        let a = _mm256_set1_ps(10.);
46396        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46397        let e = _mm256_set1_ps(1.25);
46398        assert_eq_m256(r, e);
46399    }
46400
46401    #[simd_test(enable = "avx512f,avx512vl")]
46402    unsafe fn test_mm256_mask_getmant_ps() {
46403        let a = _mm256_set1_ps(10.);
46404        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46405        assert_eq_m256(r, a);
46406        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
46407        let e = _mm256_set1_ps(1.25);
46408        assert_eq_m256(r, e);
46409    }
46410
46411    #[simd_test(enable = "avx512f,avx512vl")]
46412    unsafe fn test_mm256_maskz_getmant_ps() {
46413        let a = _mm256_set1_ps(10.);
46414        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46415        assert_eq_m256(r, _mm256_setzero_ps());
46416        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
46417        let e = _mm256_set1_ps(1.25);
46418        assert_eq_m256(r, e);
46419    }
46420
46421    #[simd_test(enable = "avx512f,avx512vl")]
46422    unsafe fn test_mm_getmant_ps() {
46423        let a = _mm_set1_ps(10.);
46424        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46425        let e = _mm_set1_ps(1.25);
46426        assert_eq_m128(r, e);
46427    }
46428
46429    #[simd_test(enable = "avx512f,avx512vl")]
46430    unsafe fn test_mm_mask_getmant_ps() {
46431        let a = _mm_set1_ps(10.);
46432        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46433        assert_eq_m128(r, a);
46434        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
46435        let e = _mm_set1_ps(1.25);
46436        assert_eq_m128(r, e);
46437    }
46438
46439    #[simd_test(enable = "avx512f,avx512vl")]
46440    unsafe fn test_mm_maskz_getmant_ps() {
46441        let a = _mm_set1_ps(10.);
46442        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46443        assert_eq_m128(r, _mm_setzero_ps());
46444        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
46445        let e = _mm_set1_ps(1.25);
46446        assert_eq_m128(r, e);
46447    }
46448
46449    #[simd_test(enable = "avx512f")]
46450    unsafe fn test_mm512_add_round_ps() {
46451        let a = _mm512_setr_ps(
46452            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46453        );
46454        let b = _mm512_set1_ps(-1.);
46455        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46456        #[rustfmt::skip]
46457        let e = _mm512_setr_ps(
46458            -1., 0.5, 1., 2.5,
46459            3., 4.5, 5., 6.5,
46460            7., 8.5, 9., 10.5,
46461            11., 12.5, 13., -0.99999994,
46462        );
46463        assert_eq_m512(r, e);
46464        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46465        let e = _mm512_setr_ps(
46466            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46467        );
46468        assert_eq_m512(r, e);
46469    }
46470
46471    #[simd_test(enable = "avx512f")]
46472    unsafe fn test_mm512_mask_add_round_ps() {
46473        let a = _mm512_setr_ps(
46474            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46475        );
46476        let b = _mm512_set1_ps(-1.);
46477        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
46478        assert_eq_m512(r, a);
46479        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46480            a,
46481            0b11111111_00000000,
46482            a,
46483            b,
46484        );
46485        #[rustfmt::skip]
46486        let e = _mm512_setr_ps(
46487            0., 1.5, 2., 3.5,
46488            4., 5.5, 6., 7.5,
46489            7., 8.5, 9., 10.5,
46490            11., 12.5, 13., -0.99999994,
46491        );
46492        assert_eq_m512(r, e);
46493    }
46494
46495    #[simd_test(enable = "avx512f")]
46496    unsafe fn test_mm512_maskz_add_round_ps() {
46497        let a = _mm512_setr_ps(
46498            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46499        );
46500        let b = _mm512_set1_ps(-1.);
46501        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
46502        assert_eq_m512(r, _mm512_setzero_ps());
46503        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46504            0b11111111_00000000,
46505            a,
46506            b,
46507        );
46508        #[rustfmt::skip]
46509        let e = _mm512_setr_ps(
46510            0., 0., 0., 0.,
46511            0., 0., 0., 0.,
46512            7., 8.5, 9., 10.5,
46513            11., 12.5, 13., -0.99999994,
46514        );
46515        assert_eq_m512(r, e);
46516    }
46517
46518    #[simd_test(enable = "avx512f")]
46519    unsafe fn test_mm512_sub_round_ps() {
46520        let a = _mm512_setr_ps(
46521            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46522        );
46523        let b = _mm512_set1_ps(1.);
46524        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46525        #[rustfmt::skip]
46526        let e = _mm512_setr_ps(
46527            -1., 0.5, 1., 2.5,
46528            3., 4.5, 5., 6.5,
46529            7., 8.5, 9., 10.5,
46530            11., 12.5, 13., -0.99999994,
46531        );
46532        assert_eq_m512(r, e);
46533        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46534        let e = _mm512_setr_ps(
46535            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46536        );
46537        assert_eq_m512(r, e);
46538    }
46539
46540    #[simd_test(enable = "avx512f")]
46541    unsafe fn test_mm512_mask_sub_round_ps() {
46542        let a = _mm512_setr_ps(
46543            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46544        );
46545        let b = _mm512_set1_ps(1.);
46546        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46547            a, 0, a, b,
46548        );
46549        assert_eq_m512(r, a);
46550        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46551            a,
46552            0b11111111_00000000,
46553            a,
46554            b,
46555        );
46556        #[rustfmt::skip]
46557        let e = _mm512_setr_ps(
46558            0., 1.5, 2., 3.5,
46559            4., 5.5, 6., 7.5,
46560            7., 8.5, 9., 10.5,
46561            11., 12.5, 13., -0.99999994,
46562        );
46563        assert_eq_m512(r, e);
46564    }
46565
46566    #[simd_test(enable = "avx512f")]
46567    unsafe fn test_mm512_maskz_sub_round_ps() {
46568        let a = _mm512_setr_ps(
46569            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46570        );
46571        let b = _mm512_set1_ps(1.);
46572        let r =
46573            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46574        assert_eq_m512(r, _mm512_setzero_ps());
46575        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46576            0b11111111_00000000,
46577            a,
46578            b,
46579        );
46580        #[rustfmt::skip]
46581        let e = _mm512_setr_ps(
46582            0., 0., 0., 0.,
46583            0., 0., 0., 0.,
46584            7., 8.5, 9., 10.5,
46585            11., 12.5, 13., -0.99999994,
46586        );
46587        assert_eq_m512(r, e);
46588    }
46589
46590    #[simd_test(enable = "avx512f")]
46591    unsafe fn test_mm512_mul_round_ps() {
46592        #[rustfmt::skip]
46593        let a = _mm512_setr_ps(
46594            0., 1.5, 2., 3.5,
46595            4., 5.5, 6., 7.5,
46596            8., 9.5, 10., 11.5,
46597            12., 13.5, 14., 0.00000000000000000000007,
46598        );
46599        let b = _mm512_set1_ps(0.1);
46600        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46601        #[rustfmt::skip]
46602        let e = _mm512_setr_ps(
46603            0., 0.15, 0.2, 0.35,
46604            0.4, 0.55, 0.6, 0.75,
46605            0.8, 0.95, 1.0, 1.15,
46606            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46607        );
46608        assert_eq_m512(r, e);
46609        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46610        #[rustfmt::skip]
46611        let e = _mm512_setr_ps(
46612            0., 0.14999999, 0.2, 0.35,
46613            0.4, 0.54999995, 0.59999996, 0.75,
46614            0.8, 0.95, 1.0, 1.15,
46615            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
46616        );
46617        assert_eq_m512(r, e);
46618    }
46619
46620    #[simd_test(enable = "avx512f")]
46621    unsafe fn test_mm512_mask_mul_round_ps() {
46622        #[rustfmt::skip]
46623        let a = _mm512_setr_ps(
46624            0., 1.5, 2., 3.5,
46625            4., 5.5, 6., 7.5,
46626            8., 9.5, 10., 11.5,
46627            12., 13.5, 14., 0.00000000000000000000007,
46628        );
46629        let b = _mm512_set1_ps(0.1);
46630        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46631            a, 0, a, b,
46632        );
46633        assert_eq_m512(r, a);
46634        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46635            a,
46636            0b11111111_00000000,
46637            a,
46638            b,
46639        );
46640        #[rustfmt::skip]
46641        let e = _mm512_setr_ps(
46642            0., 1.5, 2., 3.5,
46643            4., 5.5, 6., 7.5,
46644            0.8, 0.95, 1.0, 1.15,
46645            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46646        );
46647        assert_eq_m512(r, e);
46648    }
46649
46650    #[simd_test(enable = "avx512f")]
46651    unsafe fn test_mm512_maskz_mul_round_ps() {
46652        #[rustfmt::skip]
46653        let a = _mm512_setr_ps(
46654            0., 1.5, 2., 3.5,
46655            4., 5.5, 6., 7.5,
46656            8., 9.5, 10., 11.5,
46657            12., 13.5, 14., 0.00000000000000000000007,
46658        );
46659        let b = _mm512_set1_ps(0.1);
46660        let r =
46661            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46662        assert_eq_m512(r, _mm512_setzero_ps());
46663        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46664            0b11111111_00000000,
46665            a,
46666            b,
46667        );
46668        #[rustfmt::skip]
46669        let e = _mm512_setr_ps(
46670            0., 0., 0., 0.,
46671            0., 0., 0., 0.,
46672            0.8, 0.95, 1.0, 1.15,
46673            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46674        );
46675        assert_eq_m512(r, e);
46676    }
46677
46678    #[simd_test(enable = "avx512f")]
46679    unsafe fn test_mm512_div_round_ps() {
46680        let a = _mm512_set1_ps(1.);
46681        let b = _mm512_set1_ps(3.);
46682        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46683        let e = _mm512_set1_ps(0.33333334);
46684        assert_eq_m512(r, e);
46685        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46686        let e = _mm512_set1_ps(0.3333333);
46687        assert_eq_m512(r, e);
46688    }
46689
46690    #[simd_test(enable = "avx512f")]
46691    unsafe fn test_mm512_mask_div_round_ps() {
46692        let a = _mm512_set1_ps(1.);
46693        let b = _mm512_set1_ps(3.);
46694        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46695            a, 0, a, b,
46696        );
46697        assert_eq_m512(r, a);
46698        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46699            a,
46700            0b11111111_00000000,
46701            a,
46702            b,
46703        );
46704        let e = _mm512_setr_ps(
46705            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46706            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46707        );
46708        assert_eq_m512(r, e);
46709    }
46710
46711    #[simd_test(enable = "avx512f")]
46712    unsafe fn test_mm512_maskz_div_round_ps() {
46713        let a = _mm512_set1_ps(1.);
46714        let b = _mm512_set1_ps(3.);
46715        let r =
46716            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46717        assert_eq_m512(r, _mm512_setzero_ps());
46718        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46719            0b11111111_00000000,
46720            a,
46721            b,
46722        );
46723        let e = _mm512_setr_ps(
46724            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46725            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46726        );
46727        assert_eq_m512(r, e);
46728    }
46729
46730    #[simd_test(enable = "avx512f")]
46731    unsafe fn test_mm512_sqrt_round_ps() {
46732        let a = _mm512_set1_ps(3.);
46733        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46734        let e = _mm512_set1_ps(1.7320508);
46735        assert_eq_m512(r, e);
46736        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
46737        let e = _mm512_set1_ps(1.7320509);
46738        assert_eq_m512(r, e);
46739    }
46740
46741    #[simd_test(enable = "avx512f")]
46742    unsafe fn test_mm512_mask_sqrt_round_ps() {
46743        let a = _mm512_set1_ps(3.);
46744        let r =
46745            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
46746        assert_eq_m512(r, a);
46747        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46748            a,
46749            0b11111111_00000000,
46750            a,
46751        );
46752        let e = _mm512_setr_ps(
46753            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46754            1.7320508, 1.7320508, 1.7320508,
46755        );
46756        assert_eq_m512(r, e);
46757    }
46758
46759    #[simd_test(enable = "avx512f")]
46760    unsafe fn test_mm512_maskz_sqrt_round_ps() {
46761        let a = _mm512_set1_ps(3.);
46762        let r =
46763            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
46764        assert_eq_m512(r, _mm512_setzero_ps());
46765        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46766            0b11111111_00000000,
46767            a,
46768        );
46769        let e = _mm512_setr_ps(
46770            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46771            1.7320508, 1.7320508, 1.7320508,
46772        );
46773        assert_eq_m512(r, e);
46774    }
46775
46776    #[simd_test(enable = "avx512f")]
46777    unsafe fn test_mm512_fmadd_round_ps() {
46778        let a = _mm512_set1_ps(0.00000007);
46779        let b = _mm512_set1_ps(1.);
46780        let c = _mm512_set1_ps(-1.);
46781        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46782        let e = _mm512_set1_ps(-0.99999994);
46783        assert_eq_m512(r, e);
46784        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46785        let e = _mm512_set1_ps(-0.9999999);
46786        assert_eq_m512(r, e);
46787    }
46788
46789    #[simd_test(enable = "avx512f")]
46790    unsafe fn test_mm512_mask_fmadd_round_ps() {
46791        let a = _mm512_set1_ps(0.00000007);
46792        let b = _mm512_set1_ps(1.);
46793        let c = _mm512_set1_ps(-1.);
46794        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46795            a, 0, b, c,
46796        );
46797        assert_eq_m512(r, a);
46798        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46799            a,
46800            0b00000000_11111111,
46801            b,
46802            c,
46803        );
46804        #[rustfmt::skip]
46805        let e = _mm512_setr_ps(
46806            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46807            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46808            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46809            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46810        );
46811        assert_eq_m512(r, e);
46812    }
46813
46814    #[simd_test(enable = "avx512f")]
46815    unsafe fn test_mm512_maskz_fmadd_round_ps() {
46816        let a = _mm512_set1_ps(0.00000007);
46817        let b = _mm512_set1_ps(1.);
46818        let c = _mm512_set1_ps(-1.);
46819        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46820            0, a, b, c,
46821        );
46822        assert_eq_m512(r, _mm512_setzero_ps());
46823        #[rustfmt::skip]
46824        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46825            0b00000000_11111111,
46826            a,
46827            b,
46828            c,
46829        );
46830        #[rustfmt::skip]
46831        let e = _mm512_setr_ps(
46832            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46833            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46834            0., 0., 0., 0.,
46835            0., 0., 0., 0.,
46836        );
46837        assert_eq_m512(r, e);
46838    }
46839
46840    #[simd_test(enable = "avx512f")]
46841    unsafe fn test_mm512_mask3_fmadd_round_ps() {
46842        let a = _mm512_set1_ps(0.00000007);
46843        let b = _mm512_set1_ps(1.);
46844        let c = _mm512_set1_ps(-1.);
46845        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46846            a, b, c, 0,
46847        );
46848        assert_eq_m512(r, c);
46849        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46850            a,
46851            b,
46852            c,
46853            0b00000000_11111111,
46854        );
46855        #[rustfmt::skip]
46856        let e = _mm512_setr_ps(
46857            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46858            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46859            -1., -1., -1., -1.,
46860            -1., -1., -1., -1.,
46861        );
46862        assert_eq_m512(r, e);
46863    }
46864
46865    #[simd_test(enable = "avx512f")]
46866    unsafe fn test_mm512_fmsub_round_ps() {
46867        let a = _mm512_set1_ps(0.00000007);
46868        let b = _mm512_set1_ps(1.);
46869        let c = _mm512_set1_ps(1.);
46870        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46871        let e = _mm512_set1_ps(-0.99999994);
46872        assert_eq_m512(r, e);
46873        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46874        let e = _mm512_set1_ps(-0.9999999);
46875        assert_eq_m512(r, e);
46876    }
46877
46878    #[simd_test(enable = "avx512f")]
46879    unsafe fn test_mm512_mask_fmsub_round_ps() {
46880        let a = _mm512_set1_ps(0.00000007);
46881        let b = _mm512_set1_ps(1.);
46882        let c = _mm512_set1_ps(1.);
46883        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46884            a, 0, b, c,
46885        );
46886        assert_eq_m512(r, a);
46887        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46888            a,
46889            0b00000000_11111111,
46890            b,
46891            c,
46892        );
46893        #[rustfmt::skip]
46894        let e = _mm512_setr_ps(
46895            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46896            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46897            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46898            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46899        );
46900        assert_eq_m512(r, e);
46901    }
46902
46903    #[simd_test(enable = "avx512f")]
46904    unsafe fn test_mm512_maskz_fmsub_round_ps() {
46905        let a = _mm512_set1_ps(0.00000007);
46906        let b = _mm512_set1_ps(1.);
46907        let c = _mm512_set1_ps(1.);
46908        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46909            0, a, b, c,
46910        );
46911        assert_eq_m512(r, _mm512_setzero_ps());
46912        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46913            0b00000000_11111111,
46914            a,
46915            b,
46916            c,
46917        );
46918        #[rustfmt::skip]
46919        let e = _mm512_setr_ps(
46920            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46921            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46922            0., 0., 0., 0.,
46923            0., 0., 0., 0.,
46924        );
46925        assert_eq_m512(r, e);
46926    }
46927
46928    #[simd_test(enable = "avx512f")]
46929    unsafe fn test_mm512_mask3_fmsub_round_ps() {
46930        let a = _mm512_set1_ps(0.00000007);
46931        let b = _mm512_set1_ps(1.);
46932        let c = _mm512_set1_ps(1.);
46933        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46934            a, b, c, 0,
46935        );
46936        assert_eq_m512(r, c);
46937        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46938            a,
46939            b,
46940            c,
46941            0b00000000_11111111,
46942        );
46943        #[rustfmt::skip]
46944        let e = _mm512_setr_ps(
46945            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46946            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46947            1., 1., 1., 1.,
46948            1., 1., 1., 1.,
46949        );
46950        assert_eq_m512(r, e);
46951    }
46952
46953    #[simd_test(enable = "avx512f")]
46954    unsafe fn test_mm512_fmaddsub_round_ps() {
46955        let a = _mm512_set1_ps(0.00000007);
46956        let b = _mm512_set1_ps(1.);
46957        let c = _mm512_set1_ps(-1.);
46958        let r =
46959            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46960        #[rustfmt::skip]
46961        let e = _mm512_setr_ps(
46962            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46963            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46964            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46965            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46966        );
46967        assert_eq_m512(r, e);
46968        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46969        let e = _mm512_setr_ps(
46970            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
46971            -0.9999999, 1., -0.9999999, 1., -0.9999999,
46972        );
46973        assert_eq_m512(r, e);
46974    }
46975
46976    #[simd_test(enable = "avx512f")]
46977    unsafe fn test_mm512_mask_fmaddsub_round_ps() {
46978        let a = _mm512_set1_ps(0.00000007);
46979        let b = _mm512_set1_ps(1.);
46980        let c = _mm512_set1_ps(-1.);
46981        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46982            a, 0, b, c,
46983        );
46984        assert_eq_m512(r, a);
46985        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46986            a,
46987            0b00000000_11111111,
46988            b,
46989            c,
46990        );
46991        #[rustfmt::skip]
46992        let e = _mm512_setr_ps(
46993            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46994            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46995            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46996            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46997        );
46998        assert_eq_m512(r, e);
46999    }
47000
47001    #[simd_test(enable = "avx512f")]
47002    unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47003        let a = _mm512_set1_ps(0.00000007);
47004        let b = _mm512_set1_ps(1.);
47005        let c = _mm512_set1_ps(-1.);
47006        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47007            0, a, b, c,
47008        );
47009        assert_eq_m512(r, _mm512_setzero_ps());
47010        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47011            0b00000000_11111111,
47012            a,
47013            b,
47014            c,
47015        );
47016        #[rustfmt::skip]
47017        let e = _mm512_setr_ps(
47018            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47019            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47020            0., 0., 0., 0.,
47021            0., 0., 0., 0.,
47022        );
47023        assert_eq_m512(r, e);
47024    }
47025
47026    #[simd_test(enable = "avx512f")]
47027    unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47028        let a = _mm512_set1_ps(0.00000007);
47029        let b = _mm512_set1_ps(1.);
47030        let c = _mm512_set1_ps(-1.);
47031        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47032            a, b, c, 0,
47033        );
47034        assert_eq_m512(r, c);
47035        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47036            a,
47037            b,
47038            c,
47039            0b00000000_11111111,
47040        );
47041        #[rustfmt::skip]
47042        let e = _mm512_setr_ps(
47043            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47044            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47045            -1., -1., -1., -1.,
47046            -1., -1., -1., -1.,
47047        );
47048        assert_eq_m512(r, e);
47049    }
47050
47051    #[simd_test(enable = "avx512f")]
47052    unsafe fn test_mm512_fmsubadd_round_ps() {
47053        let a = _mm512_set1_ps(0.00000007);
47054        let b = _mm512_set1_ps(1.);
47055        let c = _mm512_set1_ps(-1.);
47056        let r =
47057            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47058        #[rustfmt::skip]
47059        let e = _mm512_setr_ps(
47060            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47061            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47062            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47063            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47064        );
47065        assert_eq_m512(r, e);
47066        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47067        let e = _mm512_setr_ps(
47068            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47069            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47070        );
47071        assert_eq_m512(r, e);
47072    }
47073
47074    #[simd_test(enable = "avx512f")]
47075    unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47076        let a = _mm512_set1_ps(0.00000007);
47077        let b = _mm512_set1_ps(1.);
47078        let c = _mm512_set1_ps(-1.);
47079        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47080            a, 0, b, c,
47081        );
47082        assert_eq_m512(r, a);
47083        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47084            a,
47085            0b00000000_11111111,
47086            b,
47087            c,
47088        );
47089        #[rustfmt::skip]
47090        let e = _mm512_setr_ps(
47091            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47092            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47093            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47094            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47095        );
47096        assert_eq_m512(r, e);
47097    }
47098
47099    #[simd_test(enable = "avx512f")]
47100    unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47101        let a = _mm512_set1_ps(0.00000007);
47102        let b = _mm512_set1_ps(1.);
47103        let c = _mm512_set1_ps(-1.);
47104        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47105            0, a, b, c,
47106        );
47107        assert_eq_m512(r, _mm512_setzero_ps());
47108        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47109            0b00000000_11111111,
47110            a,
47111            b,
47112            c,
47113        );
47114        #[rustfmt::skip]
47115        let e = _mm512_setr_ps(
47116            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47117            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47118            0., 0., 0., 0.,
47119            0., 0., 0., 0.,
47120        );
47121        assert_eq_m512(r, e);
47122    }
47123
47124    #[simd_test(enable = "avx512f")]
47125    unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47126        let a = _mm512_set1_ps(0.00000007);
47127        let b = _mm512_set1_ps(1.);
47128        let c = _mm512_set1_ps(-1.);
47129        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47130            a, b, c, 0,
47131        );
47132        assert_eq_m512(r, c);
47133        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47134            a,
47135            b,
47136            c,
47137            0b00000000_11111111,
47138        );
47139        #[rustfmt::skip]
47140        let e = _mm512_setr_ps(
47141            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47142            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47143            -1., -1., -1., -1.,
47144            -1., -1., -1., -1.,
47145        );
47146        assert_eq_m512(r, e);
47147    }
47148
47149    #[simd_test(enable = "avx512f")]
47150    unsafe fn test_mm512_fnmadd_round_ps() {
47151        let a = _mm512_set1_ps(0.00000007);
47152        let b = _mm512_set1_ps(1.);
47153        let c = _mm512_set1_ps(1.);
47154        let r =
47155            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47156        let e = _mm512_set1_ps(0.99999994);
47157        assert_eq_m512(r, e);
47158        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47159        let e = _mm512_set1_ps(0.9999999);
47160        assert_eq_m512(r, e);
47161    }
47162
47163    #[simd_test(enable = "avx512f")]
47164    unsafe fn test_mm512_mask_fnmadd_round_ps() {
47165        let a = _mm512_set1_ps(0.00000007);
47166        let b = _mm512_set1_ps(1.);
47167        let c = _mm512_set1_ps(1.);
47168        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47169            a, 0, b, c,
47170        );
47171        assert_eq_m512(r, a);
47172        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47173            a,
47174            0b00000000_11111111,
47175            b,
47176            c,
47177        );
47178        let e = _mm512_setr_ps(
47179            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47180            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47181            0.00000007, 0.00000007,
47182        );
47183        assert_eq_m512(r, e);
47184    }
47185
47186    #[simd_test(enable = "avx512f")]
47187    unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47188        let a = _mm512_set1_ps(0.00000007);
47189        let b = _mm512_set1_ps(1.);
47190        let c = _mm512_set1_ps(1.);
47191        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47192            0, a, b, c,
47193        );
47194        assert_eq_m512(r, _mm512_setzero_ps());
47195        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47196            0b00000000_11111111,
47197            a,
47198            b,
47199            c,
47200        );
47201        let e = _mm512_setr_ps(
47202            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47203            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47204        );
47205        assert_eq_m512(r, e);
47206    }
47207
47208    #[simd_test(enable = "avx512f")]
47209    unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47210        let a = _mm512_set1_ps(0.00000007);
47211        let b = _mm512_set1_ps(1.);
47212        let c = _mm512_set1_ps(1.);
47213        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47214            a, b, c, 0,
47215        );
47216        assert_eq_m512(r, c);
47217        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47218            a,
47219            b,
47220            c,
47221            0b00000000_11111111,
47222        );
47223        let e = _mm512_setr_ps(
47224            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47225            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
47226        );
47227        assert_eq_m512(r, e);
47228    }
47229
47230    #[simd_test(enable = "avx512f")]
47231    unsafe fn test_mm512_fnmsub_round_ps() {
47232        let a = _mm512_set1_ps(0.00000007);
47233        let b = _mm512_set1_ps(1.);
47234        let c = _mm512_set1_ps(-1.);
47235        let r =
47236            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47237        let e = _mm512_set1_ps(0.99999994);
47238        assert_eq_m512(r, e);
47239        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47240        let e = _mm512_set1_ps(0.9999999);
47241        assert_eq_m512(r, e);
47242    }
47243
47244    #[simd_test(enable = "avx512f")]
47245    unsafe fn test_mm512_mask_fnmsub_round_ps() {
47246        let a = _mm512_set1_ps(0.00000007);
47247        let b = _mm512_set1_ps(1.);
47248        let c = _mm512_set1_ps(-1.);
47249        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47250            a, 0, b, c,
47251        );
47252        assert_eq_m512(r, a);
47253        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47254            a,
47255            0b00000000_11111111,
47256            b,
47257            c,
47258        );
47259        let e = _mm512_setr_ps(
47260            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47261            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47262            0.00000007, 0.00000007,
47263        );
47264        assert_eq_m512(r, e);
47265    }
47266
47267    #[simd_test(enable = "avx512f")]
47268    unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47269        let a = _mm512_set1_ps(0.00000007);
47270        let b = _mm512_set1_ps(1.);
47271        let c = _mm512_set1_ps(-1.);
47272        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47273            0, a, b, c,
47274        );
47275        assert_eq_m512(r, _mm512_setzero_ps());
47276        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47277            0b00000000_11111111,
47278            a,
47279            b,
47280            c,
47281        );
47282        let e = _mm512_setr_ps(
47283            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47284            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47285        );
47286        assert_eq_m512(r, e);
47287    }
47288
47289    #[simd_test(enable = "avx512f")]
47290    unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47291        let a = _mm512_set1_ps(0.00000007);
47292        let b = _mm512_set1_ps(1.);
47293        let c = _mm512_set1_ps(-1.);
47294        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47295            a, b, c, 0,
47296        );
47297        assert_eq_m512(r, c);
47298        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47299            a,
47300            b,
47301            c,
47302            0b00000000_11111111,
47303        );
47304        let e = _mm512_setr_ps(
47305            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47306            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
47307        );
47308        assert_eq_m512(r, e);
47309    }
47310
47311    #[simd_test(enable = "avx512f")]
47312    unsafe fn test_mm512_max_round_ps() {
47313        let a = _mm512_setr_ps(
47314            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47315        );
47316        let b = _mm512_setr_ps(
47317            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47318        );
47319        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47320        let e = _mm512_setr_ps(
47321            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47322        );
47323        assert_eq_m512(r, e);
47324    }
47325
47326    #[simd_test(enable = "avx512f")]
47327    unsafe fn test_mm512_mask_max_round_ps() {
47328        let a = _mm512_setr_ps(
47329            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47330        );
47331        let b = _mm512_setr_ps(
47332            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47333        );
47334        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47335        assert_eq_m512(r, a);
47336        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47337        let e = _mm512_setr_ps(
47338            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47339        );
47340        assert_eq_m512(r, e);
47341    }
47342
47343    #[simd_test(enable = "avx512f")]
47344    unsafe fn test_mm512_maskz_max_round_ps() {
47345        let a = _mm512_setr_ps(
47346            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47347        );
47348        let b = _mm512_setr_ps(
47349            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47350        );
47351        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47352        assert_eq_m512(r, _mm512_setzero_ps());
47353        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47354        let e = _mm512_setr_ps(
47355            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47356        );
47357        assert_eq_m512(r, e);
47358    }
47359
47360    #[simd_test(enable = "avx512f")]
47361    unsafe fn test_mm512_min_round_ps() {
47362        let a = _mm512_setr_ps(
47363            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47364        );
47365        let b = _mm512_setr_ps(
47366            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47367        );
47368        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47369        let e = _mm512_setr_ps(
47370            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
47371        );
47372        assert_eq_m512(r, e);
47373    }
47374
47375    #[simd_test(enable = "avx512f")]
47376    unsafe fn test_mm512_mask_min_round_ps() {
47377        let a = _mm512_setr_ps(
47378            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47379        );
47380        let b = _mm512_setr_ps(
47381            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47382        );
47383        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47384        assert_eq_m512(r, a);
47385        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47386        let e = _mm512_setr_ps(
47387            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47388        );
47389        assert_eq_m512(r, e);
47390    }
47391
47392    #[simd_test(enable = "avx512f")]
47393    unsafe fn test_mm512_maskz_min_round_ps() {
47394        let a = _mm512_setr_ps(
47395            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47396        );
47397        let b = _mm512_setr_ps(
47398            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47399        );
47400        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47401        assert_eq_m512(r, _mm512_setzero_ps());
47402        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47403        let e = _mm512_setr_ps(
47404            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
47405        );
47406        assert_eq_m512(r, e);
47407    }
47408
47409    #[simd_test(enable = "avx512f")]
47410    unsafe fn test_mm512_getexp_round_ps() {
47411        let a = _mm512_set1_ps(3.);
47412        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47413        let e = _mm512_set1_ps(1.);
47414        assert_eq_m512(r, e);
47415    }
47416
47417    #[simd_test(enable = "avx512f")]
47418    unsafe fn test_mm512_mask_getexp_round_ps() {
47419        let a = _mm512_set1_ps(3.);
47420        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
47421        assert_eq_m512(r, a);
47422        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
47423        let e = _mm512_setr_ps(
47424            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47425        );
47426        assert_eq_m512(r, e);
47427    }
47428
47429    #[simd_test(enable = "avx512f")]
47430    unsafe fn test_mm512_maskz_getexp_round_ps() {
47431        let a = _mm512_set1_ps(3.);
47432        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
47433        assert_eq_m512(r, _mm512_setzero_ps());
47434        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
47435        let e = _mm512_setr_ps(
47436            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47437        );
47438        assert_eq_m512(r, e);
47439    }
47440
47441    #[simd_test(enable = "avx512f")]
47442    unsafe fn test_mm512_roundscale_round_ps() {
47443        let a = _mm512_set1_ps(1.1);
47444        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
47445        let e = _mm512_set1_ps(1.0);
47446        assert_eq_m512(r, e);
47447    }
47448
47449    #[simd_test(enable = "avx512f")]
47450    unsafe fn test_mm512_mask_roundscale_round_ps() {
47451        let a = _mm512_set1_ps(1.1);
47452        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
47453        let e = _mm512_set1_ps(1.1);
47454        assert_eq_m512(r, e);
47455        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
47456            a,
47457            0b11111111_11111111,
47458            a,
47459        );
47460        let e = _mm512_set1_ps(1.0);
47461        assert_eq_m512(r, e);
47462    }
47463
47464    #[simd_test(enable = "avx512f")]
47465    unsafe fn test_mm512_maskz_roundscale_round_ps() {
47466        let a = _mm512_set1_ps(1.1);
47467        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
47468        assert_eq_m512(r, _mm512_setzero_ps());
47469        let r =
47470            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
47471        let e = _mm512_set1_ps(1.0);
47472        assert_eq_m512(r, e);
47473    }
47474
47475    #[simd_test(enable = "avx512f")]
47476    unsafe fn test_mm512_scalef_round_ps() {
47477        let a = _mm512_set1_ps(1.);
47478        let b = _mm512_set1_ps(3.);
47479        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
47480        let e = _mm512_set1_ps(8.);
47481        assert_eq_m512(r, e);
47482    }
47483
47484    #[simd_test(enable = "avx512f")]
47485    unsafe fn test_mm512_mask_scalef_round_ps() {
47486        let a = _mm512_set1_ps(1.);
47487        let b = _mm512_set1_ps(3.);
47488        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47489            a, 0, a, b,
47490        );
47491        assert_eq_m512(r, a);
47492        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47493            a,
47494            0b11111111_00000000,
47495            a,
47496            b,
47497        );
47498        let e = _mm512_set_ps(
47499            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47500        );
47501        assert_eq_m512(r, e);
47502    }
47503
47504    #[simd_test(enable = "avx512f")]
47505    unsafe fn test_mm512_maskz_scalef_round_ps() {
47506        let a = _mm512_set1_ps(1.);
47507        let b = _mm512_set1_ps(3.);
47508        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47509            0, a, b,
47510        );
47511        assert_eq_m512(r, _mm512_setzero_ps());
47512        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47513            0b11111111_00000000,
47514            a,
47515            b,
47516        );
47517        let e = _mm512_set_ps(
47518            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47519        );
47520        assert_eq_m512(r, e);
47521    }
47522
47523    #[simd_test(enable = "avx512f")]
47524    unsafe fn test_mm512_fixupimm_round_ps() {
47525        let a = _mm512_set1_ps(f32::NAN);
47526        let b = _mm512_set1_ps(f32::MAX);
47527        let c = _mm512_set1_epi32(i32::MAX);
47528        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47529        let e = _mm512_set1_ps(0.0);
47530        assert_eq_m512(r, e);
47531    }
47532
47533    #[simd_test(enable = "avx512f")]
47534    unsafe fn test_mm512_mask_fixupimm_round_ps() {
47535        #[rustfmt::skip]
47536        let a = _mm512_set_ps(
47537            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47538            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47539            1., 1., 1., 1.,
47540            1., 1., 1., 1.,
47541        );
47542        let b = _mm512_set1_ps(f32::MAX);
47543        let c = _mm512_set1_epi32(i32::MAX);
47544        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47545            a,
47546            0b11111111_00000000,
47547            b,
47548            c,
47549        );
47550        let e = _mm512_set_ps(
47551            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47552        );
47553        assert_eq_m512(r, e);
47554    }
47555
47556    #[simd_test(enable = "avx512f")]
47557    unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47558        #[rustfmt::skip]
47559        let a = _mm512_set_ps(
47560            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47561            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47562            1., 1., 1., 1.,
47563            1., 1., 1., 1.,
47564        );
47565        let b = _mm512_set1_ps(f32::MAX);
47566        let c = _mm512_set1_epi32(i32::MAX);
47567        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47568            0b11111111_00000000,
47569            a,
47570            b,
47571            c,
47572        );
47573        let e = _mm512_set_ps(
47574            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
47575        );
47576        assert_eq_m512(r, e);
47577    }
47578
47579    #[simd_test(enable = "avx512f")]
47580    unsafe fn test_mm512_getmant_round_ps() {
47581        let a = _mm512_set1_ps(10.);
47582        let r = _mm512_getmant_round_ps::<
47583            _MM_MANT_NORM_1_2,
47584            _MM_MANT_SIGN_SRC,
47585            _MM_FROUND_CUR_DIRECTION,
47586        >(a);
47587        let e = _mm512_set1_ps(1.25);
47588        assert_eq_m512(r, e);
47589    }
47590
47591    #[simd_test(enable = "avx512f")]
47592    unsafe fn test_mm512_mask_getmant_round_ps() {
47593        let a = _mm512_set1_ps(10.);
47594        let r = _mm512_mask_getmant_round_ps::<
47595            _MM_MANT_NORM_1_2,
47596            _MM_MANT_SIGN_SRC,
47597            _MM_FROUND_CUR_DIRECTION,
47598        >(a, 0, a);
47599        assert_eq_m512(r, a);
47600        let r = _mm512_mask_getmant_round_ps::<
47601            _MM_MANT_NORM_1_2,
47602            _MM_MANT_SIGN_SRC,
47603            _MM_FROUND_CUR_DIRECTION,
47604        >(a, 0b11111111_00000000, a);
47605        let e = _mm512_setr_ps(
47606            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47607        );
47608        assert_eq_m512(r, e);
47609    }
47610
47611    #[simd_test(enable = "avx512f")]
47612    unsafe fn test_mm512_maskz_getmant_round_ps() {
47613        let a = _mm512_set1_ps(10.);
47614        let r = _mm512_maskz_getmant_round_ps::<
47615            _MM_MANT_NORM_1_2,
47616            _MM_MANT_SIGN_SRC,
47617            _MM_FROUND_CUR_DIRECTION,
47618        >(0, a);
47619        assert_eq_m512(r, _mm512_setzero_ps());
47620        let r = _mm512_maskz_getmant_round_ps::<
47621            _MM_MANT_NORM_1_2,
47622            _MM_MANT_SIGN_SRC,
47623            _MM_FROUND_CUR_DIRECTION,
47624        >(0b11111111_00000000, a);
47625        let e = _mm512_setr_ps(
47626            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47627        );
47628        assert_eq_m512(r, e);
47629    }
47630
47631    #[simd_test(enable = "avx512f")]
47632    unsafe fn test_mm512_cvtps_epi32() {
47633        let a = _mm512_setr_ps(
47634            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47635        );
47636        let r = _mm512_cvtps_epi32(a);
47637        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47638        assert_eq_m512i(r, e);
47639    }
47640
47641    #[simd_test(enable = "avx512f")]
47642    unsafe fn test_mm512_mask_cvtps_epi32() {
47643        let a = _mm512_setr_ps(
47644            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47645        );
47646        let src = _mm512_set1_epi32(0);
47647        let r = _mm512_mask_cvtps_epi32(src, 0, a);
47648        assert_eq_m512i(r, src);
47649        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
47650        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47651        assert_eq_m512i(r, e);
47652    }
47653
47654    #[simd_test(enable = "avx512f")]
47655    unsafe fn test_mm512_maskz_cvtps_epi32() {
47656        let a = _mm512_setr_ps(
47657            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47658        );
47659        let r = _mm512_maskz_cvtps_epi32(0, a);
47660        assert_eq_m512i(r, _mm512_setzero_si512());
47661        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
47662        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47663        assert_eq_m512i(r, e);
47664    }
47665
47666    #[simd_test(enable = "avx512f,avx512vl")]
47667    unsafe fn test_mm256_mask_cvtps_epi32() {
47668        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47669        let src = _mm256_set1_epi32(0);
47670        let r = _mm256_mask_cvtps_epi32(src, 0, a);
47671        assert_eq_m256i(r, src);
47672        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
47673        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47674        assert_eq_m256i(r, e);
47675    }
47676
47677    #[simd_test(enable = "avx512f,avx512vl")]
47678    unsafe fn test_mm256_maskz_cvtps_epi32() {
47679        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47680        let r = _mm256_maskz_cvtps_epi32(0, a);
47681        assert_eq_m256i(r, _mm256_setzero_si256());
47682        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
47683        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47684        assert_eq_m256i(r, e);
47685    }
47686
47687    #[simd_test(enable = "avx512f,avx512vl")]
47688    unsafe fn test_mm_mask_cvtps_epi32() {
47689        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47690        let src = _mm_set1_epi32(0);
47691        let r = _mm_mask_cvtps_epi32(src, 0, a);
47692        assert_eq_m128i(r, src);
47693        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
47694        let e = _mm_set_epi32(12, 14, 14, 16);
47695        assert_eq_m128i(r, e);
47696    }
47697
47698    #[simd_test(enable = "avx512f,avx512vl")]
47699    unsafe fn test_mm_maskz_cvtps_epi32() {
47700        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47701        let r = _mm_maskz_cvtps_epi32(0, a);
47702        assert_eq_m128i(r, _mm_setzero_si128());
47703        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
47704        let e = _mm_set_epi32(12, 14, 14, 16);
47705        assert_eq_m128i(r, e);
47706    }
47707
47708    #[simd_test(enable = "avx512f")]
47709    unsafe fn test_mm512_cvtps_epu32() {
47710        let a = _mm512_setr_ps(
47711            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47712        );
47713        let r = _mm512_cvtps_epu32(a);
47714        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47715        assert_eq_m512i(r, e);
47716    }
47717
47718    #[simd_test(enable = "avx512f")]
47719    unsafe fn test_mm512_mask_cvtps_epu32() {
47720        let a = _mm512_setr_ps(
47721            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47722        );
47723        let src = _mm512_set1_epi32(0);
47724        let r = _mm512_mask_cvtps_epu32(src, 0, a);
47725        assert_eq_m512i(r, src);
47726        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
47727        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47728        assert_eq_m512i(r, e);
47729    }
47730
47731    #[simd_test(enable = "avx512f")]
47732    unsafe fn test_mm512_maskz_cvtps_epu32() {
47733        let a = _mm512_setr_ps(
47734            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47735        );
47736        let r = _mm512_maskz_cvtps_epu32(0, a);
47737        assert_eq_m512i(r, _mm512_setzero_si512());
47738        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
47739        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47740        assert_eq_m512i(r, e);
47741    }
47742
47743    #[simd_test(enable = "avx512f,avx512vl")]
47744    unsafe fn test_mm256_cvtps_epu32() {
47745        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47746        let r = _mm256_cvtps_epu32(a);
47747        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47748        assert_eq_m256i(r, e);
47749    }
47750
47751    #[simd_test(enable = "avx512f,avx512vl")]
47752    unsafe fn test_mm256_mask_cvtps_epu32() {
47753        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47754        let src = _mm256_set1_epi32(0);
47755        let r = _mm256_mask_cvtps_epu32(src, 0, a);
47756        assert_eq_m256i(r, src);
47757        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
47758        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47759        assert_eq_m256i(r, e);
47760    }
47761
47762    #[simd_test(enable = "avx512f,avx512vl")]
47763    unsafe fn test_mm256_maskz_cvtps_epu32() {
47764        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47765        let r = _mm256_maskz_cvtps_epu32(0, a);
47766        assert_eq_m256i(r, _mm256_setzero_si256());
47767        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
47768        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47769        assert_eq_m256i(r, e);
47770    }
47771
47772    #[simd_test(enable = "avx512f,avx512vl")]
47773    unsafe fn test_mm_cvtps_epu32() {
47774        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47775        let r = _mm_cvtps_epu32(a);
47776        let e = _mm_set_epi32(12, 14, 14, 16);
47777        assert_eq_m128i(r, e);
47778    }
47779
47780    #[simd_test(enable = "avx512f,avx512vl")]
47781    unsafe fn test_mm_mask_cvtps_epu32() {
47782        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47783        let src = _mm_set1_epi32(0);
47784        let r = _mm_mask_cvtps_epu32(src, 0, a);
47785        assert_eq_m128i(r, src);
47786        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
47787        let e = _mm_set_epi32(12, 14, 14, 16);
47788        assert_eq_m128i(r, e);
47789    }
47790
47791    #[simd_test(enable = "avx512f,avx512vl")]
47792    unsafe fn test_mm_maskz_cvtps_epu32() {
47793        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47794        let r = _mm_maskz_cvtps_epu32(0, a);
47795        assert_eq_m128i(r, _mm_setzero_si128());
47796        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
47797        let e = _mm_set_epi32(12, 14, 14, 16);
47798        assert_eq_m128i(r, e);
47799    }
47800
47801    #[simd_test(enable = "avx512f")]
47802    unsafe fn test_mm512_cvtepi8_epi32() {
47803        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47804        let r = _mm512_cvtepi8_epi32(a);
47805        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47806        assert_eq_m512i(r, e);
47807    }
47808
47809    #[simd_test(enable = "avx512f")]
47810    unsafe fn test_mm512_mask_cvtepi8_epi32() {
47811        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47812        let src = _mm512_set1_epi32(-1);
47813        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
47814        assert_eq_m512i(r, src);
47815        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
47816        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47817        assert_eq_m512i(r, e);
47818    }
47819
47820    #[simd_test(enable = "avx512f")]
47821    unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47822        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47823        let r = _mm512_maskz_cvtepi8_epi32(0, a);
47824        assert_eq_m512i(r, _mm512_setzero_si512());
47825        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
47826        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47827        assert_eq_m512i(r, e);
47828    }
47829
47830    #[simd_test(enable = "avx512f,avx512vl")]
47831    unsafe fn test_mm256_mask_cvtepi8_epi32() {
47832        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47833        let src = _mm256_set1_epi32(-1);
47834        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
47835        assert_eq_m256i(r, src);
47836        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
47837        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47838        assert_eq_m256i(r, e);
47839    }
47840
47841    #[simd_test(enable = "avx512f,avx512vl")]
47842    unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47843        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47844        let r = _mm256_maskz_cvtepi8_epi32(0, a);
47845        assert_eq_m256i(r, _mm256_setzero_si256());
47846        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
47847        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47848        assert_eq_m256i(r, e);
47849    }
47850
47851    #[simd_test(enable = "avx512f,avx512vl")]
47852    unsafe fn test_mm_mask_cvtepi8_epi32() {
47853        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47854        let src = _mm_set1_epi32(-1);
47855        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
47856        assert_eq_m128i(r, src);
47857        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
47858        let e = _mm_set_epi32(12, 13, 14, 15);
47859        assert_eq_m128i(r, e);
47860    }
47861
47862    #[simd_test(enable = "avx512f,avx512vl")]
47863    unsafe fn test_mm_maskz_cvtepi8_epi32() {
47864        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47865        let r = _mm_maskz_cvtepi8_epi32(0, a);
47866        assert_eq_m128i(r, _mm_setzero_si128());
47867        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
47868        let e = _mm_set_epi32(12, 13, 14, 15);
47869        assert_eq_m128i(r, e);
47870    }
47871
47872    #[simd_test(enable = "avx512f")]
47873    unsafe fn test_mm512_cvtepu8_epi32() {
47874        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47875        let r = _mm512_cvtepu8_epi32(a);
47876        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47877        assert_eq_m512i(r, e);
47878    }
47879
47880    #[simd_test(enable = "avx512f")]
47881    unsafe fn test_mm512_mask_cvtepu8_epi32() {
47882        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47883        let src = _mm512_set1_epi32(-1);
47884        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
47885        assert_eq_m512i(r, src);
47886        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
47887        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47888        assert_eq_m512i(r, e);
47889    }
47890
47891    #[simd_test(enable = "avx512f")]
47892    unsafe fn test_mm512_maskz_cvtepu8_epi32() {
47893        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47894        let r = _mm512_maskz_cvtepu8_epi32(0, a);
47895        assert_eq_m512i(r, _mm512_setzero_si512());
47896        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
47897        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47898        assert_eq_m512i(r, e);
47899    }
47900
47901    #[simd_test(enable = "avx512f,avx512vl")]
47902    unsafe fn test_mm256_mask_cvtepu8_epi32() {
47903        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47904        let src = _mm256_set1_epi32(-1);
47905        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
47906        assert_eq_m256i(r, src);
47907        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
47908        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47909        assert_eq_m256i(r, e);
47910    }
47911
47912    #[simd_test(enable = "avx512f,avx512vl")]
47913    unsafe fn test_mm256_maskz_cvtepu8_epi32() {
47914        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47915        let r = _mm256_maskz_cvtepu8_epi32(0, a);
47916        assert_eq_m256i(r, _mm256_setzero_si256());
47917        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
47918        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47919        assert_eq_m256i(r, e);
47920    }
47921
47922    #[simd_test(enable = "avx512f,avx512vl")]
47923    unsafe fn test_mm_mask_cvtepu8_epi32() {
47924        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47925        let src = _mm_set1_epi32(-1);
47926        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
47927        assert_eq_m128i(r, src);
47928        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
47929        let e = _mm_set_epi32(12, 13, 14, 15);
47930        assert_eq_m128i(r, e);
47931    }
47932
47933    #[simd_test(enable = "avx512f,avx512vl")]
47934    unsafe fn test_mm_maskz_cvtepu8_epi32() {
47935        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47936        let r = _mm_maskz_cvtepu8_epi32(0, a);
47937        assert_eq_m128i(r, _mm_setzero_si128());
47938        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
47939        let e = _mm_set_epi32(12, 13, 14, 15);
47940        assert_eq_m128i(r, e);
47941    }
47942
47943    #[simd_test(enable = "avx512f")]
47944    unsafe fn test_mm512_cvtepi16_epi32() {
47945        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47946        let r = _mm512_cvtepi16_epi32(a);
47947        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47948        assert_eq_m512i(r, e);
47949    }
47950
47951    #[simd_test(enable = "avx512f")]
47952    unsafe fn test_mm512_mask_cvtepi16_epi32() {
47953        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47954        let src = _mm512_set1_epi32(-1);
47955        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
47956        assert_eq_m512i(r, src);
47957        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
47958        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47959        assert_eq_m512i(r, e);
47960    }
47961
47962    #[simd_test(enable = "avx512f")]
47963    unsafe fn test_mm512_maskz_cvtepi16_epi32() {
47964        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47965        let r = _mm512_maskz_cvtepi16_epi32(0, a);
47966        assert_eq_m512i(r, _mm512_setzero_si512());
47967        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
47968        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47969        assert_eq_m512i(r, e);
47970    }
47971
47972    #[simd_test(enable = "avx512f,avx512vl")]
47973    unsafe fn test_mm256_mask_cvtepi16_epi32() {
47974        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
47975        let src = _mm256_set1_epi32(-1);
47976        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
47977        assert_eq_m256i(r, src);
47978        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
47979        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
47980        assert_eq_m256i(r, e);
47981    }
47982
47983    #[simd_test(enable = "avx512f,avx512vl")]
47984    unsafe fn test_mm256_maskz_cvtepi16_epi32() {
47985        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
47986        let r = _mm256_maskz_cvtepi16_epi32(0, a);
47987        assert_eq_m256i(r, _mm256_setzero_si256());
47988        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
47989        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
47990        assert_eq_m256i(r, e);
47991    }
47992
47993    #[simd_test(enable = "avx512f,avx512vl")]
47994    unsafe fn test_mm_mask_cvtepi16_epi32() {
47995        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
47996        let src = _mm_set1_epi32(-1);
47997        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
47998        assert_eq_m128i(r, src);
47999        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
48000        let e = _mm_set_epi32(4, 5, 6, 7);
48001        assert_eq_m128i(r, e);
48002    }
48003
48004    #[simd_test(enable = "avx512f,avx512vl")]
48005    unsafe fn test_mm_maskz_cvtepi16_epi32() {
48006        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48007        let r = _mm_maskz_cvtepi16_epi32(0, a);
48008        assert_eq_m128i(r, _mm_setzero_si128());
48009        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
48010        let e = _mm_set_epi32(4, 5, 6, 7);
48011        assert_eq_m128i(r, e);
48012    }
48013
48014    #[simd_test(enable = "avx512f")]
48015    unsafe fn test_mm512_cvtepu16_epi32() {
48016        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48017        let r = _mm512_cvtepu16_epi32(a);
48018        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48019        assert_eq_m512i(r, e);
48020    }
48021
48022    #[simd_test(enable = "avx512f")]
48023    unsafe fn test_mm512_mask_cvtepu16_epi32() {
48024        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48025        let src = _mm512_set1_epi32(-1);
48026        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
48027        assert_eq_m512i(r, src);
48028        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
48029        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48030        assert_eq_m512i(r, e);
48031    }
48032
48033    #[simd_test(enable = "avx512f")]
48034    unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48035        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48036        let r = _mm512_maskz_cvtepu16_epi32(0, a);
48037        assert_eq_m512i(r, _mm512_setzero_si512());
48038        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
48039        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48040        assert_eq_m512i(r, e);
48041    }
48042
48043    #[simd_test(enable = "avx512f,avx512vl")]
48044    unsafe fn test_mm256_mask_cvtepu16_epi32() {
48045        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48046        let src = _mm256_set1_epi32(-1);
48047        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
48048        assert_eq_m256i(r, src);
48049        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
48050        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48051        assert_eq_m256i(r, e);
48052    }
48053
48054    #[simd_test(enable = "avx512f,avx512vl")]
48055    unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48056        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48057        let r = _mm256_maskz_cvtepu16_epi32(0, a);
48058        assert_eq_m256i(r, _mm256_setzero_si256());
48059        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
48060        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48061        assert_eq_m256i(r, e);
48062    }
48063
48064    #[simd_test(enable = "avx512f,avx512vl")]
48065    unsafe fn test_mm_mask_cvtepu16_epi32() {
48066        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48067        let src = _mm_set1_epi32(-1);
48068        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
48069        assert_eq_m128i(r, src);
48070        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
48071        let e = _mm_set_epi32(12, 13, 14, 15);
48072        assert_eq_m128i(r, e);
48073    }
48074
48075    #[simd_test(enable = "avx512f,avx512vl")]
48076    unsafe fn test_mm_maskz_cvtepu16_epi32() {
48077        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48078        let r = _mm_maskz_cvtepu16_epi32(0, a);
48079        assert_eq_m128i(r, _mm_setzero_si128());
48080        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
48081        let e = _mm_set_epi32(12, 13, 14, 15);
48082        assert_eq_m128i(r, e);
48083    }
48084
48085    #[simd_test(enable = "avx512f")]
48086    unsafe fn test_mm512_cvtepi32_ps() {
48087        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48088        let r = _mm512_cvtepi32_ps(a);
48089        let e = _mm512_set_ps(
48090            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48091        );
48092        assert_eq_m512(r, e);
48093    }
48094
48095    #[simd_test(enable = "avx512f")]
48096    unsafe fn test_mm512_mask_cvtepi32_ps() {
48097        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48098        let src = _mm512_set1_ps(-1.);
48099        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
48100        assert_eq_m512(r, src);
48101        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
48102        let e = _mm512_set_ps(
48103            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48104        );
48105        assert_eq_m512(r, e);
48106    }
48107
48108    #[simd_test(enable = "avx512f")]
48109    unsafe fn test_mm512_maskz_cvtepi32_ps() {
48110        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48111        let r = _mm512_maskz_cvtepi32_ps(0, a);
48112        assert_eq_m512(r, _mm512_setzero_ps());
48113        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
48114        let e = _mm512_set_ps(
48115            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48116        );
48117        assert_eq_m512(r, e);
48118    }
48119
48120    #[simd_test(enable = "avx512f,avx512vl")]
48121    unsafe fn test_mm256_mask_cvtepi32_ps() {
48122        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48123        let src = _mm256_set1_ps(-1.);
48124        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
48125        assert_eq_m256(r, src);
48126        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
48127        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48128        assert_eq_m256(r, e);
48129    }
48130
48131    #[simd_test(enable = "avx512f,avx512vl")]
48132    unsafe fn test_mm256_maskz_cvtepi32_ps() {
48133        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48134        let r = _mm256_maskz_cvtepi32_ps(0, a);
48135        assert_eq_m256(r, _mm256_setzero_ps());
48136        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
48137        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48138        assert_eq_m256(r, e);
48139    }
48140
48141    #[simd_test(enable = "avx512f,avx512vl")]
48142    unsafe fn test_mm_mask_cvtepi32_ps() {
48143        let a = _mm_set_epi32(1, 2, 3, 4);
48144        let src = _mm_set1_ps(-1.);
48145        let r = _mm_mask_cvtepi32_ps(src, 0, a);
48146        assert_eq_m128(r, src);
48147        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
48148        let e = _mm_set_ps(1., 2., 3., 4.);
48149        assert_eq_m128(r, e);
48150    }
48151
48152    #[simd_test(enable = "avx512f,avx512vl")]
48153    unsafe fn test_mm_maskz_cvtepi32_ps() {
48154        let a = _mm_set_epi32(1, 2, 3, 4);
48155        let r = _mm_maskz_cvtepi32_ps(0, a);
48156        assert_eq_m128(r, _mm_setzero_ps());
48157        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
48158        let e = _mm_set_ps(1., 2., 3., 4.);
48159        assert_eq_m128(r, e);
48160    }
48161
48162    #[simd_test(enable = "avx512f")]
48163    unsafe fn test_mm512_cvtepu32_ps() {
48164        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48165        let r = _mm512_cvtepu32_ps(a);
48166        let e = _mm512_set_ps(
48167            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48168        );
48169        assert_eq_m512(r, e);
48170    }
48171
48172    #[simd_test(enable = "avx512f")]
48173    unsafe fn test_mm512_mask_cvtepu32_ps() {
48174        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48175        let src = _mm512_set1_ps(-1.);
48176        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
48177        assert_eq_m512(r, src);
48178        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
48179        let e = _mm512_set_ps(
48180            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48181        );
48182        assert_eq_m512(r, e);
48183    }
48184
48185    #[simd_test(enable = "avx512f")]
48186    unsafe fn test_mm512_maskz_cvtepu32_ps() {
48187        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48188        let r = _mm512_maskz_cvtepu32_ps(0, a);
48189        assert_eq_m512(r, _mm512_setzero_ps());
48190        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
48191        let e = _mm512_set_ps(
48192            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48193        );
48194        assert_eq_m512(r, e);
48195    }
48196
48197    #[simd_test(enable = "avx512f")]
48198    unsafe fn test_mm512_cvtepi32_epi16() {
48199        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48200        let r = _mm512_cvtepi32_epi16(a);
48201        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48202        assert_eq_m256i(r, e);
48203    }
48204
48205    #[simd_test(enable = "avx512f")]
48206    unsafe fn test_mm512_mask_cvtepi32_epi16() {
48207        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48208        let src = _mm256_set1_epi16(-1);
48209        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
48210        assert_eq_m256i(r, src);
48211        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
48212        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48213        assert_eq_m256i(r, e);
48214    }
48215
48216    #[simd_test(enable = "avx512f")]
48217    unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48218        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48219        let r = _mm512_maskz_cvtepi32_epi16(0, a);
48220        assert_eq_m256i(r, _mm256_setzero_si256());
48221        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
48222        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48223        assert_eq_m256i(r, e);
48224    }
48225
48226    #[simd_test(enable = "avx512f,avx512vl")]
48227    unsafe fn test_mm256_cvtepi32_epi16() {
48228        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48229        let r = _mm256_cvtepi32_epi16(a);
48230        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48231        assert_eq_m128i(r, e);
48232    }
48233
48234    #[simd_test(enable = "avx512f,avx512vl")]
48235    unsafe fn test_mm256_mask_cvtepi32_epi16() {
48236        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48237        let src = _mm_set1_epi16(-1);
48238        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
48239        assert_eq_m128i(r, src);
48240        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
48241        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48242        assert_eq_m128i(r, e);
48243    }
48244
48245    #[simd_test(enable = "avx512f,avx512vl")]
48246    unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48247        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48248        let r = _mm256_maskz_cvtepi32_epi16(0, a);
48249        assert_eq_m128i(r, _mm_setzero_si128());
48250        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
48251        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48252        assert_eq_m128i(r, e);
48253    }
48254
48255    #[simd_test(enable = "avx512f,avx512vl")]
48256    unsafe fn test_mm_cvtepi32_epi16() {
48257        let a = _mm_set_epi32(4, 5, 6, 7);
48258        let r = _mm_cvtepi32_epi16(a);
48259        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48260        assert_eq_m128i(r, e);
48261    }
48262
48263    #[simd_test(enable = "avx512f,avx512vl")]
48264    unsafe fn test_mm_mask_cvtepi32_epi16() {
48265        let a = _mm_set_epi32(4, 5, 6, 7);
48266        let src = _mm_set1_epi16(0);
48267        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
48268        assert_eq_m128i(r, src);
48269        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
48270        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48271        assert_eq_m128i(r, e);
48272    }
48273
48274    #[simd_test(enable = "avx512f,avx512vl")]
48275    unsafe fn test_mm_maskz_cvtepi32_epi16() {
48276        let a = _mm_set_epi32(4, 5, 6, 7);
48277        let r = _mm_maskz_cvtepi32_epi16(0, a);
48278        assert_eq_m128i(r, _mm_setzero_si128());
48279        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
48280        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48281        assert_eq_m128i(r, e);
48282    }
48283
48284    #[simd_test(enable = "avx512f")]
48285    unsafe fn test_mm512_cvtepi32_epi8() {
48286        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48287        let r = _mm512_cvtepi32_epi8(a);
48288        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48289        assert_eq_m128i(r, e);
48290    }
48291
48292    #[simd_test(enable = "avx512f")]
48293    unsafe fn test_mm512_mask_cvtepi32_epi8() {
48294        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48295        let src = _mm_set1_epi8(-1);
48296        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
48297        assert_eq_m128i(r, src);
48298        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
48299        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48300        assert_eq_m128i(r, e);
48301    }
48302
48303    #[simd_test(enable = "avx512f")]
48304    unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48305        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48306        let r = _mm512_maskz_cvtepi32_epi8(0, a);
48307        assert_eq_m128i(r, _mm_setzero_si128());
48308        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
48309        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48310        assert_eq_m128i(r, e);
48311    }
48312
48313    #[simd_test(enable = "avx512f,avx512vl")]
48314    unsafe fn test_mm256_cvtepi32_epi8() {
48315        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48316        let r = _mm256_cvtepi32_epi8(a);
48317        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48318        assert_eq_m128i(r, e);
48319    }
48320
48321    #[simd_test(enable = "avx512f,avx512vl")]
48322    unsafe fn test_mm256_mask_cvtepi32_epi8() {
48323        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48324        let src = _mm_set1_epi8(0);
48325        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
48326        assert_eq_m128i(r, src);
48327        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
48328        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48329        assert_eq_m128i(r, e);
48330    }
48331
48332    #[simd_test(enable = "avx512f,avx512vl")]
48333    unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48334        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48335        let r = _mm256_maskz_cvtepi32_epi8(0, a);
48336        assert_eq_m128i(r, _mm_setzero_si128());
48337        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
48338        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48339        assert_eq_m128i(r, e);
48340    }
48341
48342    #[simd_test(enable = "avx512f,avx512vl")]
48343    unsafe fn test_mm_cvtepi32_epi8() {
48344        let a = _mm_set_epi32(4, 5, 6, 7);
48345        let r = _mm_cvtepi32_epi8(a);
48346        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48347        assert_eq_m128i(r, e);
48348    }
48349
48350    #[simd_test(enable = "avx512f,avx512vl")]
48351    unsafe fn test_mm_mask_cvtepi32_epi8() {
48352        let a = _mm_set_epi32(4, 5, 6, 7);
48353        let src = _mm_set1_epi8(0);
48354        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
48355        assert_eq_m128i(r, src);
48356        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
48357        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48358        assert_eq_m128i(r, e);
48359    }
48360
48361    #[simd_test(enable = "avx512f,avx512vl")]
48362    unsafe fn test_mm_maskz_cvtepi32_epi8() {
48363        let a = _mm_set_epi32(4, 5, 6, 7);
48364        let r = _mm_maskz_cvtepi32_epi8(0, a);
48365        assert_eq_m128i(r, _mm_setzero_si128());
48366        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
48367        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48368        assert_eq_m128i(r, e);
48369    }
48370
48371    #[simd_test(enable = "avx512f")]
48372    unsafe fn test_mm512_cvtsepi32_epi16() {
48373        #[rustfmt::skip]
48374        let a = _mm512_set_epi32(
48375            0, 1, 2, 3,
48376            4, 5, 6, 7,
48377            8, 9, 10, 11,
48378            12, 13, i32::MIN, i32::MAX,
48379        );
48380        let r = _mm512_cvtsepi32_epi16(a);
48381        #[rustfmt::skip]
48382        let e = _mm256_set_epi16(
48383            0, 1, 2, 3,
48384            4, 5, 6, 7,
48385            8, 9, 10, 11,
48386            12, 13, i16::MIN, i16::MAX,
48387        );
48388        assert_eq_m256i(r, e);
48389    }
48390
48391    #[simd_test(enable = "avx512f")]
48392    unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48393        #[rustfmt::skip]
48394        let a = _mm512_set_epi32(
48395            0, 1, 2, 3,
48396            4, 5, 6, 7,
48397            8, 9, 10, 11,
48398            12, 13, i32::MIN, i32::MAX,
48399        );
48400        let src = _mm256_set1_epi16(-1);
48401        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
48402        assert_eq_m256i(r, src);
48403        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
48404        #[rustfmt::skip]
48405        let e = _mm256_set_epi16(
48406            -1, -1, -1, -1,
48407            -1, -1, -1, -1,
48408            8, 9, 10, 11,
48409            12, 13, i16::MIN, i16::MAX,
48410        );
48411        assert_eq_m256i(r, e);
48412    }
48413
48414    #[simd_test(enable = "avx512f")]
48415    unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48416        #[rustfmt::skip]
48417        let a = _mm512_set_epi32(
48418            0, 1, 2, 3,
48419            4, 5, 6, 7,
48420            8, 9, 10, 11,
48421            12, 13, i32::MIN, i32::MAX,
48422        );
48423        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
48424        assert_eq_m256i(r, _mm256_setzero_si256());
48425        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
48426        #[rustfmt::skip]
48427        let e = _mm256_set_epi16(
48428            0, 0, 0, 0,
48429            0, 0, 0, 0,
48430            8, 9, 10, 11,
48431            12, 13, i16::MIN, i16::MAX,
48432        );
48433        assert_eq_m256i(r, e);
48434    }
48435
48436    #[simd_test(enable = "avx512f,avx512vl")]
48437    unsafe fn test_mm256_cvtsepi32_epi16() {
48438        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48439        let r = _mm256_cvtsepi32_epi16(a);
48440        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48441        assert_eq_m128i(r, e);
48442    }
48443
48444    #[simd_test(enable = "avx512f,avx512vl")]
48445    unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48446        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48447        let src = _mm_set1_epi16(-1);
48448        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
48449        assert_eq_m128i(r, src);
48450        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
48451        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48452        assert_eq_m128i(r, e);
48453    }
48454
48455    #[simd_test(enable = "avx512f,avx512vl")]
48456    unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48457        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48458        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
48459        assert_eq_m128i(r, _mm_setzero_si128());
48460        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
48461        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48462        assert_eq_m128i(r, e);
48463    }
48464
48465    #[simd_test(enable = "avx512f,avx512vl")]
48466    unsafe fn test_mm_cvtsepi32_epi16() {
48467        let a = _mm_set_epi32(4, 5, 6, 7);
48468        let r = _mm_cvtsepi32_epi16(a);
48469        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48470        assert_eq_m128i(r, e);
48471    }
48472
48473    #[simd_test(enable = "avx512f,avx512vl")]
48474    unsafe fn test_mm_mask_cvtsepi32_epi16() {
48475        let a = _mm_set_epi32(4, 5, 6, 7);
48476        let src = _mm_set1_epi16(0);
48477        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
48478        assert_eq_m128i(r, src);
48479        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
48480        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48481        assert_eq_m128i(r, e);
48482    }
48483
48484    #[simd_test(enable = "avx512f,avx512vl")]
48485    unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48486        let a = _mm_set_epi32(4, 5, 6, 7);
48487        let r = _mm_maskz_cvtsepi32_epi16(0, a);
48488        assert_eq_m128i(r, _mm_setzero_si128());
48489        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
48490        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48491        assert_eq_m128i(r, e);
48492    }
48493
48494    #[simd_test(enable = "avx512f")]
48495    unsafe fn test_mm512_cvtsepi32_epi8() {
48496        #[rustfmt::skip]
48497        let a = _mm512_set_epi32(
48498            0, 1, 2, 3,
48499            4, 5, 6, 7,
48500            8, 9, 10, 11,
48501            12, 13, i32::MIN, i32::MAX,
48502        );
48503        let r = _mm512_cvtsepi32_epi8(a);
48504        #[rustfmt::skip]
48505        let e = _mm_set_epi8(
48506            0, 1, 2, 3,
48507            4, 5, 6, 7,
48508            8, 9, 10, 11,
48509            12, 13, i8::MIN, i8::MAX,
48510        );
48511        assert_eq_m128i(r, e);
48512    }
48513
48514    #[simd_test(enable = "avx512f")]
48515    unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48516        #[rustfmt::skip]
48517        let a = _mm512_set_epi32(
48518            0, 1, 2, 3,
48519            4, 5, 6, 7,
48520            8, 9, 10, 11,
48521            12, 13, i32::MIN, i32::MAX,
48522        );
48523        let src = _mm_set1_epi8(-1);
48524        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
48525        assert_eq_m128i(r, src);
48526        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
48527        #[rustfmt::skip]
48528        let e = _mm_set_epi8(
48529            -1, -1, -1, -1,
48530            -1, -1, -1, -1,
48531            8, 9, 10, 11,
48532            12, 13, i8::MIN, i8::MAX,
48533        );
48534        assert_eq_m128i(r, e);
48535    }
48536
48537    #[simd_test(enable = "avx512f")]
48538    unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48539        #[rustfmt::skip]
48540        let a = _mm512_set_epi32(
48541            0, 1, 2, 3,
48542            4, 5, 6, 7,
48543            8, 9, 10, 11,
48544            12, 13, i32::MIN, i32::MAX,
48545        );
48546        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
48547        assert_eq_m128i(r, _mm_setzero_si128());
48548        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
48549        #[rustfmt::skip]
48550        let e = _mm_set_epi8(
48551            0, 0, 0, 0,
48552            0, 0, 0, 0,
48553            8, 9, 10, 11,
48554            12, 13, i8::MIN, i8::MAX,
48555        );
48556        assert_eq_m128i(r, e);
48557    }
48558
48559    #[simd_test(enable = "avx512f,avx512vl")]
48560    unsafe fn test_mm256_cvtsepi32_epi8() {
48561        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48562        let r = _mm256_cvtsepi32_epi8(a);
48563        #[rustfmt::skip]
48564        let e = _mm_set_epi8(
48565            0, 0, 0, 0,
48566            0, 0, 0, 0,
48567            9, 10, 11, 12,
48568            13, 14, 15, 16,
48569        );
48570        assert_eq_m128i(r, e);
48571    }
48572
48573    #[simd_test(enable = "avx512f,avx512vl")]
48574    unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48575        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48576        let src = _mm_set1_epi8(0);
48577        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
48578        assert_eq_m128i(r, src);
48579        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
48580        #[rustfmt::skip]
48581        let e = _mm_set_epi8(
48582            0, 0, 0, 0,
48583            0, 0, 0, 0,
48584            9, 10, 11, 12,
48585            13, 14, 15, 16,
48586        );
48587        assert_eq_m128i(r, e);
48588    }
48589
48590    #[simd_test(enable = "avx512f,avx512vl")]
48591    unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48592        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48593        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
48594        assert_eq_m128i(r, _mm_setzero_si128());
48595        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
48596        #[rustfmt::skip]
48597        let e = _mm_set_epi8(
48598            0, 0, 0, 0,
48599            0, 0, 0, 0,
48600            9, 10, 11, 12,
48601            13, 14, 15, 16,
48602        );
48603        assert_eq_m128i(r, e);
48604    }
48605
48606    #[simd_test(enable = "avx512f,avx512vl")]
48607    unsafe fn test_mm_cvtsepi32_epi8() {
48608        let a = _mm_set_epi32(13, 14, 15, 16);
48609        let r = _mm_cvtsepi32_epi8(a);
48610        #[rustfmt::skip]
48611        let e = _mm_set_epi8(
48612            0, 0, 0, 0,
48613            0, 0, 0, 0,
48614            0, 0, 0, 0,
48615            13, 14, 15, 16,
48616        );
48617        assert_eq_m128i(r, e);
48618    }
48619
48620    #[simd_test(enable = "avx512f,avx512vl")]
48621    unsafe fn test_mm_mask_cvtsepi32_epi8() {
48622        let a = _mm_set_epi32(13, 14, 15, 16);
48623        let src = _mm_set1_epi8(0);
48624        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
48625        assert_eq_m128i(r, src);
48626        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
48627        #[rustfmt::skip]
48628        let e = _mm_set_epi8(
48629            0, 0, 0, 0,
48630            0, 0, 0, 0,
48631            0, 0, 0, 0,
48632            13, 14, 15, 16,
48633        );
48634        assert_eq_m128i(r, e);
48635    }
48636
48637    #[simd_test(enable = "avx512f,avx512vl")]
48638    unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48639        let a = _mm_set_epi32(13, 14, 15, 16);
48640        let r = _mm_maskz_cvtsepi32_epi8(0, a);
48641        assert_eq_m128i(r, _mm_setzero_si128());
48642        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
48643        #[rustfmt::skip]
48644        let e = _mm_set_epi8(
48645            0, 0, 0, 0,
48646            0, 0, 0, 0,
48647            0, 0, 0, 0,
48648            13, 14, 15, 16,
48649        );
48650        assert_eq_m128i(r, e);
48651    }
48652
48653    #[simd_test(enable = "avx512f")]
48654    unsafe fn test_mm512_cvtusepi32_epi16() {
48655        #[rustfmt::skip]
48656        let a = _mm512_set_epi32(
48657            0, 1, 2, 3,
48658            4, 5, 6, 7,
48659            8, 9, 10, 11,
48660            12, 13, i32::MIN, i32::MIN,
48661        );
48662        let r = _mm512_cvtusepi32_epi16(a);
48663        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48664        assert_eq_m256i(r, e);
48665    }
48666
48667    #[simd_test(enable = "avx512f")]
48668    unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48669        #[rustfmt::skip]
48670        let a = _mm512_set_epi32(
48671            0, 1, 2, 3,
48672            4, 5, 6, 7,
48673            8, 9, 10, 11,
48674            12, 13, i32::MIN, i32::MIN,
48675        );
48676        let src = _mm256_set1_epi16(-1);
48677        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
48678        assert_eq_m256i(r, src);
48679        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
48680        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48681        assert_eq_m256i(r, e);
48682    }
48683
48684    #[simd_test(enable = "avx512f")]
48685    unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48686        #[rustfmt::skip]
48687        let a = _mm512_set_epi32(
48688            0, 1, 2, 3,
48689            4, 5, 6, 7,
48690            8, 9, 10, 11,
48691            12, 13, i32::MIN, i32::MIN,
48692        );
48693        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
48694        assert_eq_m256i(r, _mm256_setzero_si256());
48695        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
48696        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48697        assert_eq_m256i(r, e);
48698    }
48699
48700    #[simd_test(enable = "avx512f,avx512vl")]
48701    unsafe fn test_mm256_cvtusepi32_epi16() {
48702        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48703        let r = _mm256_cvtusepi32_epi16(a);
48704        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48705        assert_eq_m128i(r, e);
48706    }
48707
48708    #[simd_test(enable = "avx512f,avx512vl")]
48709    unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48710        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48711        let src = _mm_set1_epi16(0);
48712        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
48713        assert_eq_m128i(r, src);
48714        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
48715        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48716        assert_eq_m128i(r, e);
48717    }
48718
48719    #[simd_test(enable = "avx512f,avx512vl")]
48720    unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48721        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48722        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
48723        assert_eq_m128i(r, _mm_setzero_si128());
48724        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
48725        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48726        assert_eq_m128i(r, e);
48727    }
48728
48729    #[simd_test(enable = "avx512f,avx512vl")]
48730    unsafe fn test_mm_cvtusepi32_epi16() {
48731        let a = _mm_set_epi32(5, 6, 7, 8);
48732        let r = _mm_cvtusepi32_epi16(a);
48733        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48734        assert_eq_m128i(r, e);
48735    }
48736
48737    #[simd_test(enable = "avx512f,avx512vl")]
48738    unsafe fn test_mm_mask_cvtusepi32_epi16() {
48739        let a = _mm_set_epi32(5, 6, 7, 8);
48740        let src = _mm_set1_epi16(0);
48741        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
48742        assert_eq_m128i(r, src);
48743        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
48744        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48745        assert_eq_m128i(r, e);
48746    }
48747
48748    #[simd_test(enable = "avx512f,avx512vl")]
48749    unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48750        let a = _mm_set_epi32(5, 6, 7, 8);
48751        let r = _mm_maskz_cvtusepi32_epi16(0, a);
48752        assert_eq_m128i(r, _mm_setzero_si128());
48753        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
48754        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48755        assert_eq_m128i(r, e);
48756    }
48757
48758    #[simd_test(enable = "avx512f")]
48759    unsafe fn test_mm512_cvtusepi32_epi8() {
48760        #[rustfmt::skip]
48761        let a = _mm512_set_epi32(
48762            0, 1, 2, 3,
48763            4, 5, 6, 7,
48764            8, 9, 10, 11,
48765            12, 13, i32::MIN, i32::MIN,
48766        );
48767        let r = _mm512_cvtusepi32_epi8(a);
48768        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48769        assert_eq_m128i(r, e);
48770    }
48771
48772    #[simd_test(enable = "avx512f")]
48773    unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48774        #[rustfmt::skip]
48775        let a = _mm512_set_epi32(
48776            0, 1, 2, 3,
48777            4, 5, 6, 7,
48778            8, 9, 10, 11,
48779            12, 13, i32::MIN, i32::MIN,
48780        );
48781        let src = _mm_set1_epi8(-1);
48782        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
48783        assert_eq_m128i(r, src);
48784        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
48785        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48786        assert_eq_m128i(r, e);
48787    }
48788
48789    #[simd_test(enable = "avx512f")]
48790    unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48791        #[rustfmt::skip]
48792        let a = _mm512_set_epi32(
48793            0, 1, 2, 3,
48794            4, 5, 6, 7,
48795            8, 9, 10, 11,
48796            12, 13, i32::MIN, i32::MIN,
48797        );
48798        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
48799        assert_eq_m128i(r, _mm_setzero_si128());
48800        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
48801        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48802        assert_eq_m128i(r, e);
48803    }
48804
48805    #[simd_test(enable = "avx512f,avx512vl")]
48806    unsafe fn test_mm256_cvtusepi32_epi8() {
48807        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48808        let r = _mm256_cvtusepi32_epi8(a);
48809        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48810        assert_eq_m128i(r, e);
48811    }
48812
48813    #[simd_test(enable = "avx512f,avx512vl")]
48814    unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48815        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48816        let src = _mm_set1_epi8(0);
48817        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
48818        assert_eq_m128i(r, src);
48819        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
48820        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48821        assert_eq_m128i(r, e);
48822    }
48823
48824    #[simd_test(enable = "avx512f,avx512vl")]
48825    unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48826        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48827        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
48828        assert_eq_m128i(r, _mm_setzero_si128());
48829        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
48830        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48831        assert_eq_m128i(r, e);
48832    }
48833
48834    #[simd_test(enable = "avx512f,avx512vl")]
48835    unsafe fn test_mm_cvtusepi32_epi8() {
48836        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48837        let r = _mm_cvtusepi32_epi8(a);
48838        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48839        assert_eq_m128i(r, e);
48840    }
48841
48842    #[simd_test(enable = "avx512f,avx512vl")]
48843    unsafe fn test_mm_mask_cvtusepi32_epi8() {
48844        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48845        let src = _mm_set1_epi8(0);
48846        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
48847        assert_eq_m128i(r, src);
48848        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
48849        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48850        assert_eq_m128i(r, e);
48851    }
48852
48853    #[simd_test(enable = "avx512f,avx512vl")]
48854    unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48855        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48856        let r = _mm_maskz_cvtusepi32_epi8(0, a);
48857        assert_eq_m128i(r, _mm_setzero_si128());
48858        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
48859        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48860        assert_eq_m128i(r, e);
48861    }
48862
48863    #[simd_test(enable = "avx512f")]
48864    unsafe fn test_mm512_cvt_roundps_epi32() {
48865        let a = _mm512_setr_ps(
48866            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48867        );
48868        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48869        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48870        assert_eq_m512i(r, e);
48871        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48872        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
48873        assert_eq_m512i(r, e);
48874    }
48875
48876    #[simd_test(enable = "avx512f")]
48877    unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48878        let a = _mm512_setr_ps(
48879            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48880        );
48881        let src = _mm512_set1_epi32(0);
48882        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48883            src, 0, a,
48884        );
48885        assert_eq_m512i(r, src);
48886        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48887            src,
48888            0b00000000_11111111,
48889            a,
48890        );
48891        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48892        assert_eq_m512i(r, e);
48893    }
48894
48895    #[simd_test(enable = "avx512f")]
48896    unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
48897        let a = _mm512_setr_ps(
48898            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48899        );
48900        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48901            0, a,
48902        );
48903        assert_eq_m512i(r, _mm512_setzero_si512());
48904        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48905            0b00000000_11111111,
48906            a,
48907        );
48908        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48909        assert_eq_m512i(r, e);
48910    }
48911
48912    #[simd_test(enable = "avx512f")]
48913    unsafe fn test_mm512_cvt_roundps_epu32() {
48914        let a = _mm512_setr_ps(
48915            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48916        );
48917        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48918        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
48919        assert_eq_m512i(r, e);
48920        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48921        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48922        assert_eq_m512i(r, e);
48923    }
48924
48925    #[simd_test(enable = "avx512f")]
48926    unsafe fn test_mm512_mask_cvt_roundps_epu32() {
48927        let a = _mm512_setr_ps(
48928            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48929        );
48930        let src = _mm512_set1_epi32(0);
48931        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48932            src, 0, a,
48933        );
48934        assert_eq_m512i(r, src);
48935        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48936            src,
48937            0b00000000_11111111,
48938            a,
48939        );
48940        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48941        assert_eq_m512i(r, e);
48942    }
48943
48944    #[simd_test(enable = "avx512f")]
48945    unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
48946        let a = _mm512_setr_ps(
48947            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48948        );
48949        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48950            0, a,
48951        );
48952        assert_eq_m512i(r, _mm512_setzero_si512());
48953        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48954            0b00000000_11111111,
48955            a,
48956        );
48957        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48958        assert_eq_m512i(r, e);
48959    }
48960
48961    #[simd_test(enable = "avx512f")]
48962    unsafe fn test_mm512_cvt_roundepi32_ps() {
48963        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48964        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48965        let e = _mm512_setr_ps(
48966            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
48967        );
48968        assert_eq_m512(r, e);
48969    }
48970
48971    #[simd_test(enable = "avx512f")]
48972    unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
48973        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48974        let src = _mm512_set1_ps(0.);
48975        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48976            src, 0, a,
48977        );
48978        assert_eq_m512(r, src);
48979        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48980            src,
48981            0b00000000_11111111,
48982            a,
48983        );
48984        let e = _mm512_setr_ps(
48985            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
48986        );
48987        assert_eq_m512(r, e);
48988    }
48989
48990    #[simd_test(enable = "avx512f")]
48991    unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
48992        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48993        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48994            0, a,
48995        );
48996        assert_eq_m512(r, _mm512_setzero_ps());
48997        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48998            0b00000000_11111111,
48999            a,
49000        );
49001        let e = _mm512_setr_ps(
49002            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49003        );
49004        assert_eq_m512(r, e);
49005    }
49006
49007    #[simd_test(enable = "avx512f")]
49008    unsafe fn test_mm512_cvt_roundepu32_ps() {
49009        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49010        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49011        #[rustfmt::skip]
49012        let e = _mm512_setr_ps(
49013            0., 4294967300., 2., 4294967300.,
49014            4., 4294967300., 6., 4294967300.,
49015            8., 10., 10., 12.,
49016            12., 14., 14., 16.,
49017        );
49018        assert_eq_m512(r, e);
49019    }
49020
49021    #[simd_test(enable = "avx512f")]
49022    unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49023        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49024        let src = _mm512_set1_ps(0.);
49025        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49026            src, 0, a,
49027        );
49028        assert_eq_m512(r, src);
49029        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49030            src,
49031            0b00000000_11111111,
49032            a,
49033        );
49034        #[rustfmt::skip]
49035        let e = _mm512_setr_ps(
49036            0., 4294967300., 2., 4294967300.,
49037            4., 4294967300., 6., 4294967300.,
49038            0., 0., 0., 0.,
49039            0., 0., 0., 0.,
49040        );
49041        assert_eq_m512(r, e);
49042    }
49043
49044    #[simd_test(enable = "avx512f")]
49045    unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49046        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49047        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49048            0, a,
49049        );
49050        assert_eq_m512(r, _mm512_setzero_ps());
49051        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49052            0b00000000_11111111,
49053            a,
49054        );
49055        #[rustfmt::skip]
49056        let e = _mm512_setr_ps(
49057            0., 4294967300., 2., 4294967300.,
49058            4., 4294967300., 6., 4294967300.,
49059            0., 0., 0., 0.,
49060            0., 0., 0., 0.,
49061        );
49062        assert_eq_m512(r, e);
49063    }
49064
49065    #[simd_test(enable = "avx512f")]
49066    unsafe fn test_mm512_cvt_roundps_ph() {
49067        let a = _mm512_set1_ps(1.);
49068        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49069        let e = _mm256_setr_epi64x(
49070            4323521613979991040,
49071            4323521613979991040,
49072            4323521613979991040,
49073            4323521613979991040,
49074        );
49075        assert_eq_m256i(r, e);
49076    }
49077
49078    #[simd_test(enable = "avx512f")]
49079    unsafe fn test_mm512_mask_cvt_roundps_ph() {
49080        let a = _mm512_set1_ps(1.);
49081        let src = _mm256_set1_epi16(0);
49082        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49083        assert_eq_m256i(r, src);
49084        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49085        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49086        assert_eq_m256i(r, e);
49087    }
49088
49089    #[simd_test(enable = "avx512f")]
49090    unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49091        let a = _mm512_set1_ps(1.);
49092        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49093        assert_eq_m256i(r, _mm256_setzero_si256());
49094        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49095        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49096        assert_eq_m256i(r, e);
49097    }
49098
49099    #[simd_test(enable = "avx512f,avx512vl")]
49100    unsafe fn test_mm256_mask_cvt_roundps_ph() {
49101        let a = _mm256_set1_ps(1.);
49102        let src = _mm_set1_epi16(0);
49103        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49104        assert_eq_m128i(r, src);
49105        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49106        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49107        assert_eq_m128i(r, e);
49108    }
49109
49110    #[simd_test(enable = "avx512f,avx512vl")]
49111    unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49112        let a = _mm256_set1_ps(1.);
49113        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49114        assert_eq_m128i(r, _mm_setzero_si128());
49115        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49116        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49117        assert_eq_m128i(r, e);
49118    }
49119
49120    #[simd_test(enable = "avx512f,avx512vl")]
49121    unsafe fn test_mm_mask_cvt_roundps_ph() {
49122        let a = _mm_set1_ps(1.);
49123        let src = _mm_set1_epi16(0);
49124        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49125        assert_eq_m128i(r, src);
49126        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49127        let e = _mm_setr_epi64x(4323521613979991040, 0);
49128        assert_eq_m128i(r, e);
49129    }
49130
49131    #[simd_test(enable = "avx512f,avx512vl")]
49132    unsafe fn test_mm_maskz_cvt_roundps_ph() {
49133        let a = _mm_set1_ps(1.);
49134        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49135        assert_eq_m128i(r, _mm_setzero_si128());
49136        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49137        let e = _mm_setr_epi64x(4323521613979991040, 0);
49138        assert_eq_m128i(r, e);
49139    }
49140
49141    #[simd_test(enable = "avx512f")]
49142    unsafe fn test_mm512_cvtps_ph() {
49143        let a = _mm512_set1_ps(1.);
49144        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49145        let e = _mm256_setr_epi64x(
49146            4323521613979991040,
49147            4323521613979991040,
49148            4323521613979991040,
49149            4323521613979991040,
49150        );
49151        assert_eq_m256i(r, e);
49152    }
49153
49154    #[simd_test(enable = "avx512f")]
49155    unsafe fn test_mm512_mask_cvtps_ph() {
49156        let a = _mm512_set1_ps(1.);
49157        let src = _mm256_set1_epi16(0);
49158        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49159        assert_eq_m256i(r, src);
49160        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49161        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49162        assert_eq_m256i(r, e);
49163    }
49164
49165    #[simd_test(enable = "avx512f")]
49166    unsafe fn test_mm512_maskz_cvtps_ph() {
49167        let a = _mm512_set1_ps(1.);
49168        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49169        assert_eq_m256i(r, _mm256_setzero_si256());
49170        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49171        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49172        assert_eq_m256i(r, e);
49173    }
49174
49175    #[simd_test(enable = "avx512f,avx512vl")]
49176    unsafe fn test_mm256_mask_cvtps_ph() {
49177        let a = _mm256_set1_ps(1.);
49178        let src = _mm_set1_epi16(0);
49179        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49180        assert_eq_m128i(r, src);
49181        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49182        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49183        assert_eq_m128i(r, e);
49184    }
49185
49186    #[simd_test(enable = "avx512f,avx512vl")]
49187    unsafe fn test_mm256_maskz_cvtps_ph() {
49188        let a = _mm256_set1_ps(1.);
49189        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49190        assert_eq_m128i(r, _mm_setzero_si128());
49191        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49192        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49193        assert_eq_m128i(r, e);
49194    }
49195
49196    #[simd_test(enable = "avx512f,avx512vl")]
49197    unsafe fn test_mm_mask_cvtps_ph() {
49198        let a = _mm_set1_ps(1.);
49199        let src = _mm_set1_epi16(0);
49200        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49201        assert_eq_m128i(r, src);
49202        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49203        let e = _mm_setr_epi64x(4323521613979991040, 0);
49204        assert_eq_m128i(r, e);
49205    }
49206
49207    #[simd_test(enable = "avx512f,avx512vl")]
49208    unsafe fn test_mm_maskz_cvtps_ph() {
49209        let a = _mm_set1_ps(1.);
49210        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49211        assert_eq_m128i(r, _mm_setzero_si128());
49212        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49213        let e = _mm_setr_epi64x(4323521613979991040, 0);
49214        assert_eq_m128i(r, e);
49215    }
49216
49217    #[simd_test(enable = "avx512f")]
49218    unsafe fn test_mm512_cvt_roundph_ps() {
49219        let a = _mm256_setr_epi64x(
49220            4323521613979991040,
49221            4323521613979991040,
49222            4323521613979991040,
49223            4323521613979991040,
49224        );
49225        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49226        let e = _mm512_set1_ps(1.);
49227        assert_eq_m512(r, e);
49228    }
49229
49230    #[simd_test(enable = "avx512f")]
49231    unsafe fn test_mm512_mask_cvt_roundph_ps() {
49232        let a = _mm256_setr_epi64x(
49233            4323521613979991040,
49234            4323521613979991040,
49235            4323521613979991040,
49236            4323521613979991040,
49237        );
49238        let src = _mm512_set1_ps(0.);
49239        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
49240        assert_eq_m512(r, src);
49241        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49242        let e = _mm512_setr_ps(
49243            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49244        );
49245        assert_eq_m512(r, e);
49246    }
49247
49248    #[simd_test(enable = "avx512f")]
49249    unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49250        let a = _mm256_setr_epi64x(
49251            4323521613979991040,
49252            4323521613979991040,
49253            4323521613979991040,
49254            4323521613979991040,
49255        );
49256        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
49257        assert_eq_m512(r, _mm512_setzero_ps());
49258        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49259        let e = _mm512_setr_ps(
49260            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49261        );
49262        assert_eq_m512(r, e);
49263    }
49264
49265    #[simd_test(enable = "avx512f")]
49266    unsafe fn test_mm512_cvtph_ps() {
49267        let a = _mm256_setr_epi64x(
49268            4323521613979991040,
49269            4323521613979991040,
49270            4323521613979991040,
49271            4323521613979991040,
49272        );
49273        let r = _mm512_cvtph_ps(a);
49274        let e = _mm512_set1_ps(1.);
49275        assert_eq_m512(r, e);
49276    }
49277
49278    #[simd_test(enable = "avx512f")]
49279    unsafe fn test_mm512_mask_cvtph_ps() {
49280        let a = _mm256_setr_epi64x(
49281            4323521613979991040,
49282            4323521613979991040,
49283            4323521613979991040,
49284            4323521613979991040,
49285        );
49286        let src = _mm512_set1_ps(0.);
49287        let r = _mm512_mask_cvtph_ps(src, 0, a);
49288        assert_eq_m512(r, src);
49289        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
49290        let e = _mm512_setr_ps(
49291            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49292        );
49293        assert_eq_m512(r, e);
49294    }
49295
49296    #[simd_test(enable = "avx512f")]
49297    unsafe fn test_mm512_maskz_cvtph_ps() {
49298        let a = _mm256_setr_epi64x(
49299            4323521613979991040,
49300            4323521613979991040,
49301            4323521613979991040,
49302            4323521613979991040,
49303        );
49304        let r = _mm512_maskz_cvtph_ps(0, a);
49305        assert_eq_m512(r, _mm512_setzero_ps());
49306        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
49307        let e = _mm512_setr_ps(
49308            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49309        );
49310        assert_eq_m512(r, e);
49311    }
49312
49313    #[simd_test(enable = "avx512f,avx512vl")]
49314    unsafe fn test_mm256_mask_cvtph_ps() {
49315        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49316        let src = _mm256_set1_ps(0.);
49317        let r = _mm256_mask_cvtph_ps(src, 0, a);
49318        assert_eq_m256(r, src);
49319        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
49320        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49321        assert_eq_m256(r, e);
49322    }
49323
49324    #[simd_test(enable = "avx512f,avx512vl")]
49325    unsafe fn test_mm256_maskz_cvtph_ps() {
49326        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49327        let r = _mm256_maskz_cvtph_ps(0, a);
49328        assert_eq_m256(r, _mm256_setzero_ps());
49329        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
49330        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49331        assert_eq_m256(r, e);
49332    }
49333
49334    #[simd_test(enable = "avx512f,avx512vl")]
49335    unsafe fn test_mm_mask_cvtph_ps() {
49336        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49337        let src = _mm_set1_ps(0.);
49338        let r = _mm_mask_cvtph_ps(src, 0, a);
49339        assert_eq_m128(r, src);
49340        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
49341        let e = _mm_setr_ps(1., 1., 1., 1.);
49342        assert_eq_m128(r, e);
49343    }
49344
49345    #[simd_test(enable = "avx512f,avx512vl")]
49346    unsafe fn test_mm_maskz_cvtph_ps() {
49347        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49348        let r = _mm_maskz_cvtph_ps(0, a);
49349        assert_eq_m128(r, _mm_setzero_ps());
49350        let r = _mm_maskz_cvtph_ps(0b00001111, a);
49351        let e = _mm_setr_ps(1., 1., 1., 1.);
49352        assert_eq_m128(r, e);
49353    }
49354
49355    #[simd_test(enable = "avx512f")]
49356    unsafe fn test_mm512_cvtt_roundps_epi32() {
49357        let a = _mm512_setr_ps(
49358            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49359        );
49360        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49361        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49362        assert_eq_m512i(r, e);
49363    }
49364
49365    #[simd_test(enable = "avx512f")]
49366    unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49367        let a = _mm512_setr_ps(
49368            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49369        );
49370        let src = _mm512_set1_epi32(0);
49371        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
49372        assert_eq_m512i(r, src);
49373        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49374        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49375        assert_eq_m512i(r, e);
49376    }
49377
49378    #[simd_test(enable = "avx512f")]
49379    unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49380        let a = _mm512_setr_ps(
49381            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49382        );
49383        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
49384        assert_eq_m512i(r, _mm512_setzero_si512());
49385        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49386        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49387        assert_eq_m512i(r, e);
49388    }
49389
49390    #[simd_test(enable = "avx512f")]
49391    unsafe fn test_mm512_cvtt_roundps_epu32() {
49392        let a = _mm512_setr_ps(
49393            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49394        );
49395        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49396        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49397        assert_eq_m512i(r, e);
49398    }
49399
49400    #[simd_test(enable = "avx512f")]
49401    unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49402        let a = _mm512_setr_ps(
49403            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49404        );
49405        let src = _mm512_set1_epi32(0);
49406        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
49407        assert_eq_m512i(r, src);
49408        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49409        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49410        assert_eq_m512i(r, e);
49411    }
49412
49413    #[simd_test(enable = "avx512f")]
49414    unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49415        let a = _mm512_setr_ps(
49416            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49417        );
49418        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
49419        assert_eq_m512i(r, _mm512_setzero_si512());
49420        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49421        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49422        assert_eq_m512i(r, e);
49423    }
49424
49425    #[simd_test(enable = "avx512f")]
49426    unsafe fn test_mm512_cvttps_epi32() {
49427        let a = _mm512_setr_ps(
49428            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49429        );
49430        let r = _mm512_cvttps_epi32(a);
49431        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49432        assert_eq_m512i(r, e);
49433    }
49434
49435    #[simd_test(enable = "avx512f")]
49436    unsafe fn test_mm512_mask_cvttps_epi32() {
49437        let a = _mm512_setr_ps(
49438            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49439        );
49440        let src = _mm512_set1_epi32(0);
49441        let r = _mm512_mask_cvttps_epi32(src, 0, a);
49442        assert_eq_m512i(r, src);
49443        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
49444        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49445        assert_eq_m512i(r, e);
49446    }
49447
49448    #[simd_test(enable = "avx512f")]
49449    unsafe fn test_mm512_maskz_cvttps_epi32() {
49450        let a = _mm512_setr_ps(
49451            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49452        );
49453        let r = _mm512_maskz_cvttps_epi32(0, a);
49454        assert_eq_m512i(r, _mm512_setzero_si512());
49455        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
49456        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49457        assert_eq_m512i(r, e);
49458    }
49459
49460    #[simd_test(enable = "avx512f,avx512vl")]
49461    unsafe fn test_mm256_mask_cvttps_epi32() {
49462        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49463        let src = _mm256_set1_epi32(0);
49464        let r = _mm256_mask_cvttps_epi32(src, 0, a);
49465        assert_eq_m256i(r, src);
49466        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
49467        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49468        assert_eq_m256i(r, e);
49469    }
49470
49471    #[simd_test(enable = "avx512f,avx512vl")]
49472    unsafe fn test_mm256_maskz_cvttps_epi32() {
49473        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49474        let r = _mm256_maskz_cvttps_epi32(0, a);
49475        assert_eq_m256i(r, _mm256_setzero_si256());
49476        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
49477        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49478        assert_eq_m256i(r, e);
49479    }
49480
49481    #[simd_test(enable = "avx512f,avx512vl")]
49482    unsafe fn test_mm_mask_cvttps_epi32() {
49483        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49484        let src = _mm_set1_epi32(0);
49485        let r = _mm_mask_cvttps_epi32(src, 0, a);
49486        assert_eq_m128i(r, src);
49487        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
49488        let e = _mm_set_epi32(12, 13, 14, 15);
49489        assert_eq_m128i(r, e);
49490    }
49491
49492    #[simd_test(enable = "avx512f,avx512vl")]
49493    unsafe fn test_mm_maskz_cvttps_epi32() {
49494        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49495        let r = _mm_maskz_cvttps_epi32(0, a);
49496        assert_eq_m128i(r, _mm_setzero_si128());
49497        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
49498        let e = _mm_set_epi32(12, 13, 14, 15);
49499        assert_eq_m128i(r, e);
49500    }
49501
49502    #[simd_test(enable = "avx512f")]
49503    unsafe fn test_mm512_cvttps_epu32() {
49504        let a = _mm512_setr_ps(
49505            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49506        );
49507        let r = _mm512_cvttps_epu32(a);
49508        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49509        assert_eq_m512i(r, e);
49510    }
49511
49512    #[simd_test(enable = "avx512f")]
49513    unsafe fn test_mm512_mask_cvttps_epu32() {
49514        let a = _mm512_setr_ps(
49515            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49516        );
49517        let src = _mm512_set1_epi32(0);
49518        let r = _mm512_mask_cvttps_epu32(src, 0, a);
49519        assert_eq_m512i(r, src);
49520        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
49521        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49522        assert_eq_m512i(r, e);
49523    }
49524
49525    #[simd_test(enable = "avx512f")]
49526    unsafe fn test_mm512_maskz_cvttps_epu32() {
49527        let a = _mm512_setr_ps(
49528            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49529        );
49530        let r = _mm512_maskz_cvttps_epu32(0, a);
49531        assert_eq_m512i(r, _mm512_setzero_si512());
49532        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
49533        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49534        assert_eq_m512i(r, e);
49535    }
49536
49537    #[simd_test(enable = "avx512f,avx512vl")]
49538    unsafe fn test_mm256_cvttps_epu32() {
49539        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49540        let r = _mm256_cvttps_epu32(a);
49541        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49542        assert_eq_m256i(r, e);
49543    }
49544
49545    #[simd_test(enable = "avx512f,avx512vl")]
49546    unsafe fn test_mm256_mask_cvttps_epu32() {
49547        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49548        let src = _mm256_set1_epi32(0);
49549        let r = _mm256_mask_cvttps_epu32(src, 0, a);
49550        assert_eq_m256i(r, src);
49551        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
49552        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49553        assert_eq_m256i(r, e);
49554    }
49555
49556    #[simd_test(enable = "avx512f,avx512vl")]
49557    unsafe fn test_mm256_maskz_cvttps_epu32() {
49558        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49559        let r = _mm256_maskz_cvttps_epu32(0, a);
49560        assert_eq_m256i(r, _mm256_setzero_si256());
49561        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
49562        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49563        assert_eq_m256i(r, e);
49564    }
49565
49566    #[simd_test(enable = "avx512f,avx512vl")]
49567    unsafe fn test_mm_cvttps_epu32() {
49568        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49569        let r = _mm_cvttps_epu32(a);
49570        let e = _mm_set_epi32(12, 13, 14, 15);
49571        assert_eq_m128i(r, e);
49572    }
49573
49574    #[simd_test(enable = "avx512f,avx512vl")]
49575    unsafe fn test_mm_mask_cvttps_epu32() {
49576        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49577        let src = _mm_set1_epi32(0);
49578        let r = _mm_mask_cvttps_epu32(src, 0, a);
49579        assert_eq_m128i(r, src);
49580        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
49581        let e = _mm_set_epi32(12, 13, 14, 15);
49582        assert_eq_m128i(r, e);
49583    }
49584
49585    #[simd_test(enable = "avx512f,avx512vl")]
49586    unsafe fn test_mm_maskz_cvttps_epu32() {
49587        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49588        let r = _mm_maskz_cvttps_epu32(0, a);
49589        assert_eq_m128i(r, _mm_setzero_si128());
49590        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
49591        let e = _mm_set_epi32(12, 13, 14, 15);
49592        assert_eq_m128i(r, e);
49593    }
49594
49595    #[simd_test(enable = "avx512f")]
49596    unsafe fn test_mm512_i32gather_ps() {
49597        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49598        // A multiplier of 4 is word-addressing
49599        #[rustfmt::skip]
49600        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49601                                      120, 128, 136, 144, 152, 160, 168, 176);
49602        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr());
49603        #[rustfmt::skip]
49604        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
49605                                         120., 128., 136., 144., 152., 160., 168., 176.));
49606    }
49607
49608    #[simd_test(enable = "avx512f")]
49609    unsafe fn test_mm512_mask_i32gather_ps() {
49610        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49611        let src = _mm512_set1_ps(2.);
49612        let mask = 0b10101010_10101010;
49613        #[rustfmt::skip]
49614        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49615                                      120, 128, 136, 144, 152, 160, 168, 176);
49616        // A multiplier of 4 is word-addressing
49617        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr());
49618        #[rustfmt::skip]
49619        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
49620                                         2., 128., 2., 144., 2., 160., 2., 176.));
49621    }
49622
49623    #[simd_test(enable = "avx512f")]
49624    unsafe fn test_mm512_i32gather_epi32() {
49625        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49626        // A multiplier of 4 is word-addressing
49627        #[rustfmt::skip]
49628        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49629                                      120, 128, 136, 144, 152, 160, 168, 176);
49630        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr());
49631        #[rustfmt::skip]
49632        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49633                                             120, 128, 136, 144, 152, 160, 168, 176));
49634    }
49635
49636    #[simd_test(enable = "avx512f")]
49637    unsafe fn test_mm512_mask_i32gather_epi32() {
49638        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49639        let src = _mm512_set1_epi32(2);
49640        let mask = 0b10101010_10101010;
49641        let index = _mm512_setr_epi32(
49642            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
49643        );
49644        // A multiplier of 4 is word-addressing
49645        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr());
49646        assert_eq_m512i(
49647            r,
49648            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
49649        );
49650    }
49651
49652    #[simd_test(enable = "avx512f")]
49653    unsafe fn test_mm512_i32scatter_ps() {
49654        let mut arr = [0f32; 256];
49655        #[rustfmt::skip]
49656        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49657                                      128, 144, 160, 176, 192, 208, 224, 240);
49658        let src = _mm512_setr_ps(
49659            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49660        );
49661        // A multiplier of 4 is word-addressing
49662        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
49663        let mut expected = [0f32; 256];
49664        for i in 0..16 {
49665            expected[i * 16] = (i + 1) as f32;
49666        }
49667        assert_eq!(&arr[..], &expected[..],);
49668    }
49669
49670    #[simd_test(enable = "avx512f")]
49671    unsafe fn test_mm512_mask_i32scatter_ps() {
49672        let mut arr = [0f32; 256];
49673        let mask = 0b10101010_10101010;
49674        #[rustfmt::skip]
49675        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49676                                      128, 144, 160, 176, 192, 208, 224, 240);
49677        let src = _mm512_setr_ps(
49678            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49679        );
49680        // A multiplier of 4 is word-addressing
49681        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
49682        let mut expected = [0f32; 256];
49683        for i in 0..8 {
49684            expected[i * 32 + 16] = 2. * (i + 1) as f32;
49685        }
49686        assert_eq!(&arr[..], &expected[..],);
49687    }
49688
49689    #[simd_test(enable = "avx512f")]
49690    unsafe fn test_mm512_i32scatter_epi32() {
49691        let mut arr = [0i32; 256];
49692        #[rustfmt::skip]
49693
49694        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49695                                      128, 144, 160, 176, 192, 208, 224, 240);
49696        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49697        // A multiplier of 4 is word-addressing
49698        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
49699        let mut expected = [0i32; 256];
49700        for i in 0..16 {
49701            expected[i * 16] = (i + 1) as i32;
49702        }
49703        assert_eq!(&arr[..], &expected[..],);
49704    }
49705
49706    #[simd_test(enable = "avx512f")]
49707    unsafe fn test_mm512_mask_i32scatter_epi32() {
49708        let mut arr = [0i32; 256];
49709        let mask = 0b10101010_10101010;
49710        #[rustfmt::skip]
49711        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49712                                      128, 144, 160, 176, 192, 208, 224, 240);
49713        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49714        // A multiplier of 4 is word-addressing
49715        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
49716        let mut expected = [0i32; 256];
49717        for i in 0..8 {
49718            expected[i * 32 + 16] = 2 * (i + 1) as i32;
49719        }
49720        assert_eq!(&arr[..], &expected[..],);
49721    }
49722
49723    #[simd_test(enable = "avx512f")]
49724    unsafe fn test_mm512_cmplt_ps_mask() {
49725        #[rustfmt::skip]
49726        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49727                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49728        let b = _mm512_set1_ps(-1.);
49729        let m = _mm512_cmplt_ps_mask(a, b);
49730        assert_eq!(m, 0b00000101_00000101);
49731    }
49732
49733    #[simd_test(enable = "avx512f")]
49734    unsafe fn test_mm512_mask_cmplt_ps_mask() {
49735        #[rustfmt::skip]
49736        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49737                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49738        let b = _mm512_set1_ps(-1.);
49739        let mask = 0b01100110_01100110;
49740        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49741        assert_eq!(r, 0b00000100_00000100);
49742    }
49743
49744    #[simd_test(enable = "avx512f")]
49745    unsafe fn test_mm512_cmpnlt_ps_mask() {
49746        #[rustfmt::skip]
49747        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49748                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49749        let b = _mm512_set1_ps(-1.);
49750        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49751    }
49752
49753    #[simd_test(enable = "avx512f")]
49754    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49755        #[rustfmt::skip]
49756        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49757                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49758        let b = _mm512_set1_ps(-1.);
49759        let mask = 0b01111010_01111010;
49760        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
49761    }
49762
49763    #[simd_test(enable = "avx512f")]
49764    unsafe fn test_mm512_cmpnle_ps_mask() {
49765        #[rustfmt::skip]
49766        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49767                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49768        let b = _mm512_set1_ps(-1.);
49769        let m = _mm512_cmpnle_ps_mask(b, a);
49770        assert_eq!(m, 0b00001101_00001101);
49771    }
49772
49773    #[simd_test(enable = "avx512f")]
49774    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49775        #[rustfmt::skip]
49776        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49777                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49778        let b = _mm512_set1_ps(-1.);
49779        let mask = 0b01100110_01100110;
49780        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49781        assert_eq!(r, 0b00000100_00000100);
49782    }
49783
49784    #[simd_test(enable = "avx512f")]
49785    unsafe fn test_mm512_cmple_ps_mask() {
49786        #[rustfmt::skip]
49787        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49788                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49789        let b = _mm512_set1_ps(-1.);
49790        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
49791    }
49792
49793    #[simd_test(enable = "avx512f")]
49794    unsafe fn test_mm512_mask_cmple_ps_mask() {
49795        #[rustfmt::skip]
49796        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49797                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49798        let b = _mm512_set1_ps(-1.);
49799        let mask = 0b01111010_01111010;
49800        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
49801    }
49802
49803    #[simd_test(enable = "avx512f")]
49804    unsafe fn test_mm512_cmpeq_ps_mask() {
49805        #[rustfmt::skip]
49806        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49807                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49808        #[rustfmt::skip]
49809        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49810                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49811        let m = _mm512_cmpeq_ps_mask(b, a);
49812        assert_eq!(m, 0b11001101_11001101);
49813    }
49814
49815    #[simd_test(enable = "avx512f")]
49816    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49817        #[rustfmt::skip]
49818        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49819                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49820        #[rustfmt::skip]
49821        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49822                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49823        let mask = 0b01111010_01111010;
49824        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49825        assert_eq!(r, 0b01001000_01001000);
49826    }
49827
49828    #[simd_test(enable = "avx512f")]
49829    unsafe fn test_mm512_cmpneq_ps_mask() {
49830        #[rustfmt::skip]
49831        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49832                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49833        #[rustfmt::skip]
49834        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49835                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49836        let m = _mm512_cmpneq_ps_mask(b, a);
49837        assert_eq!(m, 0b00110010_00110010);
49838    }
49839
49840    #[simd_test(enable = "avx512f")]
49841    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49842        #[rustfmt::skip]
49843        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49844                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49845        #[rustfmt::skip]
49846        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49847                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49848        let mask = 0b01111010_01111010;
49849        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49850        assert_eq!(r, 0b00110010_00110010)
49851    }
49852
49853    #[simd_test(enable = "avx512f")]
49854    unsafe fn test_mm512_cmp_ps_mask() {
49855        #[rustfmt::skip]
49856        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49857                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49858        let b = _mm512_set1_ps(-1.);
49859        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49860        assert_eq!(m, 0b00000101_00000101);
49861    }
49862
49863    #[simd_test(enable = "avx512f")]
49864    unsafe fn test_mm512_mask_cmp_ps_mask() {
49865        #[rustfmt::skip]
49866        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49867                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49868        let b = _mm512_set1_ps(-1.);
49869        let mask = 0b01100110_01100110;
49870        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49871        assert_eq!(r, 0b00000100_00000100);
49872    }
49873
49874    #[simd_test(enable = "avx512f,avx512vl")]
49875    unsafe fn test_mm256_cmp_ps_mask() {
49876        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49877        let b = _mm256_set1_ps(-1.);
49878        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49879        assert_eq!(m, 0b00000101);
49880    }
49881
49882    #[simd_test(enable = "avx512f,avx512vl")]
49883    unsafe fn test_mm256_mask_cmp_ps_mask() {
49884        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49885        let b = _mm256_set1_ps(-1.);
49886        let mask = 0b01100110;
49887        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49888        assert_eq!(r, 0b00000100);
49889    }
49890
49891    #[simd_test(enable = "avx512f,avx512vl")]
49892    unsafe fn test_mm_cmp_ps_mask() {
49893        let a = _mm_set_ps(0., 1., -1., 13.);
49894        let b = _mm_set1_ps(1.);
49895        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49896        assert_eq!(m, 0b00001010);
49897    }
49898
49899    #[simd_test(enable = "avx512f,avx512vl")]
49900    unsafe fn test_mm_mask_cmp_ps_mask() {
49901        let a = _mm_set_ps(0., 1., -1., 13.);
49902        let b = _mm_set1_ps(1.);
49903        let mask = 0b11111111;
49904        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49905        assert_eq!(r, 0b00001010);
49906    }
49907
49908    #[simd_test(enable = "avx512f")]
49909    unsafe fn test_mm512_cmp_round_ps_mask() {
49910        #[rustfmt::skip]
49911        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49912                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49913        let b = _mm512_set1_ps(-1.);
49914        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
49915        assert_eq!(m, 0b00000101_00000101);
49916    }
49917
49918    #[simd_test(enable = "avx512f")]
49919    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
49920        #[rustfmt::skip]
49921        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49922                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49923        let b = _mm512_set1_ps(-1.);
49924        let mask = 0b01100110_01100110;
49925        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
49926        assert_eq!(r, 0b00000100_00000100);
49927    }
49928
49929    #[simd_test(enable = "avx512f")]
49930    unsafe fn test_mm512_cmpord_ps_mask() {
49931        #[rustfmt::skip]
49932        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49933                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49934        #[rustfmt::skip]
49935        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49936                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49937        let m = _mm512_cmpord_ps_mask(a, b);
49938        assert_eq!(m, 0b00000101_00000101);
49939    }
49940
49941    #[simd_test(enable = "avx512f")]
49942    unsafe fn test_mm512_mask_cmpord_ps_mask() {
49943        #[rustfmt::skip]
49944        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49945                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49946        #[rustfmt::skip]
49947        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49948                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49949        let mask = 0b11000011_11000011;
49950        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
49951        assert_eq!(m, 0b00000001_00000001);
49952    }
49953
49954    #[simd_test(enable = "avx512f")]
49955    unsafe fn test_mm512_cmpunord_ps_mask() {
49956        #[rustfmt::skip]
49957        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49958                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49959        #[rustfmt::skip]
49960        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49961                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49962        let m = _mm512_cmpunord_ps_mask(a, b);
49963
49964        assert_eq!(m, 0b11111010_11111010);
49965    }
49966
49967    #[simd_test(enable = "avx512f")]
49968    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
49969        #[rustfmt::skip]
49970        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49971                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49972        #[rustfmt::skip]
49973        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49974                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49975        let mask = 0b00001111_00001111;
49976        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
49977        assert_eq!(m, 0b000001010_00001010);
49978    }
49979
49980    #[simd_test(enable = "avx512f")]
49981    unsafe fn test_mm_cmp_ss_mask() {
49982        let a = _mm_setr_ps(2., 1., 1., 1.);
49983        let b = _mm_setr_ps(1., 2., 2., 2.);
49984        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
49985        assert_eq!(m, 1);
49986    }
49987
49988    #[simd_test(enable = "avx512f")]
49989    unsafe fn test_mm_mask_cmp_ss_mask() {
49990        let a = _mm_setr_ps(2., 1., 1., 1.);
49991        let b = _mm_setr_ps(1., 2., 2., 2.);
49992        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
49993        assert_eq!(m, 0);
49994        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
49995        assert_eq!(m, 1);
49996    }
49997
49998    #[simd_test(enable = "avx512f")]
49999    unsafe fn test_mm_cmp_round_ss_mask() {
50000        let a = _mm_setr_ps(2., 1., 1., 1.);
50001        let b = _mm_setr_ps(1., 2., 2., 2.);
50002        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50003        assert_eq!(m, 1);
50004    }
50005
50006    #[simd_test(enable = "avx512f")]
50007    unsafe fn test_mm_mask_cmp_round_ss_mask() {
50008        let a = _mm_setr_ps(2., 1., 1., 1.);
50009        let b = _mm_setr_ps(1., 2., 2., 2.);
50010        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50011        assert_eq!(m, 0);
50012        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50013        assert_eq!(m, 1);
50014    }
50015
50016    #[simd_test(enable = "avx512f")]
50017    unsafe fn test_mm_cmp_sd_mask() {
50018        let a = _mm_setr_pd(2., 1.);
50019        let b = _mm_setr_pd(1., 2.);
50020        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50021        assert_eq!(m, 1);
50022    }
50023
50024    #[simd_test(enable = "avx512f")]
50025    unsafe fn test_mm_mask_cmp_sd_mask() {
50026        let a = _mm_setr_pd(2., 1.);
50027        let b = _mm_setr_pd(1., 2.);
50028        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
50029        assert_eq!(m, 0);
50030        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
50031        assert_eq!(m, 1);
50032    }
50033
50034    #[simd_test(enable = "avx512f")]
50035    unsafe fn test_mm_cmp_round_sd_mask() {
50036        let a = _mm_setr_pd(2., 1.);
50037        let b = _mm_setr_pd(1., 2.);
50038        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50039        assert_eq!(m, 1);
50040    }
50041
50042    #[simd_test(enable = "avx512f")]
50043    unsafe fn test_mm_mask_cmp_round_sd_mask() {
50044        let a = _mm_setr_pd(2., 1.);
50045        let b = _mm_setr_pd(1., 2.);
50046        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50047        assert_eq!(m, 0);
50048        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50049        assert_eq!(m, 1);
50050    }
50051
50052    #[simd_test(enable = "avx512f")]
50053    unsafe fn test_mm512_cmplt_epu32_mask() {
50054        #[rustfmt::skip]
50055        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50056                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50057        let b = _mm512_set1_epi32(-1);
50058        let m = _mm512_cmplt_epu32_mask(a, b);
50059        assert_eq!(m, 0b11001111_11001111);
50060    }
50061
50062    #[simd_test(enable = "avx512f")]
50063    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50064        #[rustfmt::skip]
50065        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50066                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50067        let b = _mm512_set1_epi32(-1);
50068        let mask = 0b01111010_01111010;
50069        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50070        assert_eq!(r, 0b01001010_01001010);
50071    }
50072
50073    #[simd_test(enable = "avx512f,avx512vl")]
50074    unsafe fn test_mm256_cmplt_epu32_mask() {
50075        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50076        let b = _mm256_set1_epi32(1);
50077        let r = _mm256_cmplt_epu32_mask(a, b);
50078        assert_eq!(r, 0b10000000);
50079    }
50080
50081    #[simd_test(enable = "avx512f,avx512vl")]
50082    unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50083        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50084        let b = _mm256_set1_epi32(1);
50085        let mask = 0b11111111;
50086        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50087        assert_eq!(r, 0b10000000);
50088    }
50089
50090    #[simd_test(enable = "avx512f,avx512vl")]
50091    unsafe fn test_mm_cmplt_epu32_mask() {
50092        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50093        let b = _mm_set1_epi32(1);
50094        let r = _mm_cmplt_epu32_mask(a, b);
50095        assert_eq!(r, 0b00001000);
50096    }
50097
50098    #[simd_test(enable = "avx512f,avx512vl")]
50099    unsafe fn test_mm_mask_cmplt_epu32_mask() {
50100        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50101        let b = _mm_set1_epi32(1);
50102        let mask = 0b11111111;
50103        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50104        assert_eq!(r, 0b00001000);
50105    }
50106
50107    #[simd_test(enable = "avx512f")]
50108    unsafe fn test_mm512_cmpgt_epu32_mask() {
50109        #[rustfmt::skip]
50110        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50111                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50112        let b = _mm512_set1_epi32(-1);
50113        let m = _mm512_cmpgt_epu32_mask(b, a);
50114        assert_eq!(m, 0b11001111_11001111);
50115    }
50116
50117    #[simd_test(enable = "avx512f")]
50118    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50119        #[rustfmt::skip]
50120        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50121                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50122        let b = _mm512_set1_epi32(-1);
50123        let mask = 0b01111010_01111010;
50124        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50125        assert_eq!(r, 0b01001010_01001010);
50126    }
50127
50128    #[simd_test(enable = "avx512f,avx512vl")]
50129    unsafe fn test_mm256_cmpgt_epu32_mask() {
50130        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50131        let b = _mm256_set1_epi32(1);
50132        let r = _mm256_cmpgt_epu32_mask(a, b);
50133        assert_eq!(r, 0b00111111);
50134    }
50135
50136    #[simd_test(enable = "avx512f,avx512vl")]
50137    unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50138        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50139        let b = _mm256_set1_epi32(1);
50140        let mask = 0b11111111;
50141        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50142        assert_eq!(r, 0b00111111);
50143    }
50144
50145    #[simd_test(enable = "avx512f,avx512vl")]
50146    unsafe fn test_mm_cmpgt_epu32_mask() {
50147        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50148        let b = _mm_set1_epi32(1);
50149        let r = _mm_cmpgt_epu32_mask(a, b);
50150        assert_eq!(r, 0b00000011);
50151    }
50152
50153    #[simd_test(enable = "avx512f,avx512vl")]
50154    unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50155        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50156        let b = _mm_set1_epi32(1);
50157        let mask = 0b11111111;
50158        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50159        assert_eq!(r, 0b00000011);
50160    }
50161
50162    #[simd_test(enable = "avx512f")]
50163    unsafe fn test_mm512_cmple_epu32_mask() {
50164        #[rustfmt::skip]
50165        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50166                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50167        let b = _mm512_set1_epi32(-1);
50168        assert_eq!(
50169            _mm512_cmple_epu32_mask(a, b),
50170            !_mm512_cmpgt_epu32_mask(a, b)
50171        )
50172    }
50173
50174    #[simd_test(enable = "avx512f")]
50175    unsafe fn test_mm512_mask_cmple_epu32_mask() {
50176        #[rustfmt::skip]
50177        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50178                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50179        let b = _mm512_set1_epi32(-1);
50180        let mask = 0b01111010_01111010;
50181        assert_eq!(
50182            _mm512_mask_cmple_epu32_mask(mask, a, b),
50183            0b01111010_01111010
50184        );
50185    }
50186
50187    #[simd_test(enable = "avx512f,avx512vl")]
50188    unsafe fn test_mm256_cmple_epu32_mask() {
50189        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50190        let b = _mm256_set1_epi32(1);
50191        let r = _mm256_cmple_epu32_mask(a, b);
50192        assert_eq!(r, 0b11000000)
50193    }
50194
50195    #[simd_test(enable = "avx512f,avx512vl")]
50196    unsafe fn test_mm256_mask_cmple_epu32_mask() {
50197        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50198        let b = _mm256_set1_epi32(1);
50199        let mask = 0b11111111;
50200        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50201        assert_eq!(r, 0b11000000)
50202    }
50203
50204    #[simd_test(enable = "avx512f,avx512vl")]
50205    unsafe fn test_mm_cmple_epu32_mask() {
50206        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50207        let b = _mm_set1_epi32(1);
50208        let r = _mm_cmple_epu32_mask(a, b);
50209        assert_eq!(r, 0b00001100)
50210    }
50211
50212    #[simd_test(enable = "avx512f,avx512vl")]
50213    unsafe fn test_mm_mask_cmple_epu32_mask() {
50214        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50215        let b = _mm_set1_epi32(1);
50216        let mask = 0b11111111;
50217        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50218        assert_eq!(r, 0b00001100)
50219    }
50220
50221    #[simd_test(enable = "avx512f")]
50222    unsafe fn test_mm512_cmpge_epu32_mask() {
50223        #[rustfmt::skip]
50224        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50225                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50226        let b = _mm512_set1_epi32(-1);
50227        assert_eq!(
50228            _mm512_cmpge_epu32_mask(a, b),
50229            !_mm512_cmplt_epu32_mask(a, b)
50230        )
50231    }
50232
50233    #[simd_test(enable = "avx512f")]
50234    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50235        #[rustfmt::skip]
50236        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50237                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50238        let b = _mm512_set1_epi32(-1);
50239        let mask = 0b01111010_01111010;
50240        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
50241    }
50242
50243    #[simd_test(enable = "avx512f,avx512vl")]
50244    unsafe fn test_mm256_cmpge_epu32_mask() {
50245        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50246        let b = _mm256_set1_epi32(1);
50247        let r = _mm256_cmpge_epu32_mask(a, b);
50248        assert_eq!(r, 0b01111111)
50249    }
50250
50251    #[simd_test(enable = "avx512f,avx512vl")]
50252    unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50253        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50254        let b = _mm256_set1_epi32(1);
50255        let mask = 0b11111111;
50256        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50257        assert_eq!(r, 0b01111111)
50258    }
50259
50260    #[simd_test(enable = "avx512f,avx512vl")]
50261    unsafe fn test_mm_cmpge_epu32_mask() {
50262        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50263        let b = _mm_set1_epi32(1);
50264        let r = _mm_cmpge_epu32_mask(a, b);
50265        assert_eq!(r, 0b00000111)
50266    }
50267
50268    #[simd_test(enable = "avx512f,avx512vl")]
50269    unsafe fn test_mm_mask_cmpge_epu32_mask() {
50270        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50271        let b = _mm_set1_epi32(1);
50272        let mask = 0b11111111;
50273        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50274        assert_eq!(r, 0b00000111)
50275    }
50276
50277    #[simd_test(enable = "avx512f")]
50278    unsafe fn test_mm512_cmpeq_epu32_mask() {
50279        #[rustfmt::skip]
50280        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50281                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50282        #[rustfmt::skip]
50283        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50284                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50285        let m = _mm512_cmpeq_epu32_mask(b, a);
50286        assert_eq!(m, 0b11001111_11001111);
50287    }
50288
50289    #[simd_test(enable = "avx512f")]
50290    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50291        #[rustfmt::skip]
50292        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50293                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50294        #[rustfmt::skip]
50295        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50296                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50297        let mask = 0b01111010_01111010;
50298        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50299        assert_eq!(r, 0b01001010_01001010);
50300    }
50301
50302    #[simd_test(enable = "avx512f,avx512vl")]
50303    unsafe fn test_mm256_cmpeq_epu32_mask() {
50304        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50305        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50306        let m = _mm256_cmpeq_epu32_mask(b, a);
50307        assert_eq!(m, 0b11001111);
50308    }
50309
50310    #[simd_test(enable = "avx512f,avx512vl")]
50311    unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50312        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50313        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50314        let mask = 0b01111010;
50315        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50316        assert_eq!(r, 0b01001010);
50317    }
50318
50319    #[simd_test(enable = "avx512f,avx512vl")]
50320    unsafe fn test_mm_cmpeq_epu32_mask() {
50321        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50322        let b = _mm_set_epi32(0, 1, 13, 42);
50323        let m = _mm_cmpeq_epu32_mask(b, a);
50324        assert_eq!(m, 0b00001100);
50325    }
50326
50327    #[simd_test(enable = "avx512f,avx512vl")]
50328    unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50329        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50330        let b = _mm_set_epi32(0, 1, 13, 42);
50331        let mask = 0b11111111;
50332        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50333        assert_eq!(r, 0b00001100);
50334    }
50335
50336    #[simd_test(enable = "avx512f")]
50337    unsafe fn test_mm512_cmpneq_epu32_mask() {
50338        #[rustfmt::skip]
50339        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50340                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50341        #[rustfmt::skip]
50342        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50343                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50344        let m = _mm512_cmpneq_epu32_mask(b, a);
50345        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50346    }
50347
50348    #[simd_test(enable = "avx512f")]
50349    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50350        #[rustfmt::skip]
50351        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
50352                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50353        #[rustfmt::skip]
50354        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50355                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50356        let mask = 0b01111010_01111010;
50357        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50358        assert_eq!(r, 0b00110010_00110010);
50359    }
50360
50361    #[simd_test(enable = "avx512f,avx512vl")]
50362    unsafe fn test_mm256_cmpneq_epu32_mask() {
50363        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50364        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50365        let r = _mm256_cmpneq_epu32_mask(b, a);
50366        assert_eq!(r, 0b00110000);
50367    }
50368
50369    #[simd_test(enable = "avx512f,avx512vl")]
50370    unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50371        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50372        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50373        let mask = 0b11111111;
50374        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50375        assert_eq!(r, 0b00110000);
50376    }
50377
50378    #[simd_test(enable = "avx512f,avx512vl")]
50379    unsafe fn test_mm_cmpneq_epu32_mask() {
50380        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50381        let b = _mm_set_epi32(0, 1, 13, 42);
50382        let r = _mm_cmpneq_epu32_mask(b, a);
50383        assert_eq!(r, 0b00000011);
50384    }
50385
50386    #[simd_test(enable = "avx512f,avx512vl")]
50387    unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50388        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50389        let b = _mm_set_epi32(0, 1, 13, 42);
50390        let mask = 0b11111111;
50391        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50392        assert_eq!(r, 0b00000011);
50393    }
50394
50395    #[simd_test(enable = "avx512f")]
50396    unsafe fn test_mm512_cmp_epu32_mask() {
50397        #[rustfmt::skip]
50398        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50399                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50400        let b = _mm512_set1_epi32(-1);
50401        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50402        assert_eq!(m, 0b11001111_11001111);
50403    }
50404
50405    #[simd_test(enable = "avx512f")]
50406    unsafe fn test_mm512_mask_cmp_epu32_mask() {
50407        #[rustfmt::skip]
50408        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50409                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50410        let b = _mm512_set1_epi32(-1);
50411        let mask = 0b01111010_01111010;
50412        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50413        assert_eq!(r, 0b01001010_01001010);
50414    }
50415
50416    #[simd_test(enable = "avx512f,avx512vl")]
50417    unsafe fn test_mm256_cmp_epu32_mask() {
50418        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50419        let b = _mm256_set1_epi32(-1);
50420        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50421        assert_eq!(m, 0b11001111);
50422    }
50423
50424    #[simd_test(enable = "avx512f,avx512vl")]
50425    unsafe fn test_mm256_mask_cmp_epu32_mask() {
50426        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50427        let b = _mm256_set1_epi32(-1);
50428        let mask = 0b11111111;
50429        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50430        assert_eq!(r, 0b11001111);
50431    }
50432
50433    #[simd_test(enable = "avx512f,avx512vl")]
50434    unsafe fn test_mm_cmp_epu32_mask() {
50435        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50436        let b = _mm_set1_epi32(1);
50437        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50438        assert_eq!(m, 0b00001000);
50439    }
50440
50441    #[simd_test(enable = "avx512f,avx512vl")]
50442    unsafe fn test_mm_mask_cmp_epu32_mask() {
50443        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50444        let b = _mm_set1_epi32(1);
50445        let mask = 0b11111111;
50446        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50447        assert_eq!(r, 0b00001000);
50448    }
50449
50450    #[simd_test(enable = "avx512f")]
50451    unsafe fn test_mm512_cmplt_epi32_mask() {
50452        #[rustfmt::skip]
50453        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50454                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50455        let b = _mm512_set1_epi32(-1);
50456        let m = _mm512_cmplt_epi32_mask(a, b);
50457        assert_eq!(m, 0b00000101_00000101);
50458    }
50459
50460    #[simd_test(enable = "avx512f")]
50461    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50462        #[rustfmt::skip]
50463        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50464                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50465        let b = _mm512_set1_epi32(-1);
50466        let mask = 0b01100110_01100110;
50467        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50468        assert_eq!(r, 0b00000100_00000100);
50469    }
50470
50471    #[simd_test(enable = "avx512f,avx512vl")]
50472    unsafe fn test_mm256_cmplt_epi32_mask() {
50473        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50474        let b = _mm256_set1_epi32(-1);
50475        let r = _mm256_cmplt_epi32_mask(a, b);
50476        assert_eq!(r, 0b00000101);
50477    }
50478
50479    #[simd_test(enable = "avx512f,avx512vl")]
50480    unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50481        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50482        let b = _mm256_set1_epi32(-1);
50483        let mask = 0b11111111;
50484        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50485        assert_eq!(r, 0b00000101);
50486    }
50487
50488    #[simd_test(enable = "avx512f,avx512vl")]
50489    unsafe fn test_mm_cmplt_epi32_mask() {
50490        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50491        let b = _mm_set1_epi32(-1);
50492        let r = _mm_cmplt_epi32_mask(a, b);
50493        assert_eq!(r, 0b00000101);
50494    }
50495
50496    #[simd_test(enable = "avx512f,avx512vl")]
50497    unsafe fn test_mm_mask_cmplt_epi32_mask() {
50498        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50499        let b = _mm_set1_epi32(-1);
50500        let mask = 0b11111111;
50501        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50502        assert_eq!(r, 0b00000101);
50503    }
50504
50505    #[simd_test(enable = "avx512f")]
50506    unsafe fn test_mm512_cmpgt_epi32_mask() {
50507        #[rustfmt::skip]
50508        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50509                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50510        let b = _mm512_set1_epi32(-1);
50511        let m = _mm512_cmpgt_epi32_mask(b, a);
50512        assert_eq!(m, 0b00000101_00000101);
50513    }
50514
50515    #[simd_test(enable = "avx512f")]
50516    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50517        #[rustfmt::skip]
50518        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50519                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50520        let b = _mm512_set1_epi32(-1);
50521        let mask = 0b01100110_01100110;
50522        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50523        assert_eq!(r, 0b00000100_00000100);
50524    }
50525
50526    #[simd_test(enable = "avx512f,avx512vl")]
50527    unsafe fn test_mm256_cmpgt_epi32_mask() {
50528        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50529        let b = _mm256_set1_epi32(-1);
50530        let r = _mm256_cmpgt_epi32_mask(a, b);
50531        assert_eq!(r, 0b11011010);
50532    }
50533
50534    #[simd_test(enable = "avx512f,avx512vl")]
50535    unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50536        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50537        let b = _mm256_set1_epi32(-1);
50538        let mask = 0b11111111;
50539        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50540        assert_eq!(r, 0b11011010);
50541    }
50542
50543    #[simd_test(enable = "avx512f,avx512vl")]
50544    unsafe fn test_mm_cmpgt_epi32_mask() {
50545        let a = _mm_set_epi32(0, 1, -1, 13);
50546        let b = _mm_set1_epi32(-1);
50547        let r = _mm_cmpgt_epi32_mask(a, b);
50548        assert_eq!(r, 0b00001101);
50549    }
50550
50551    #[simd_test(enable = "avx512f,avx512vl")]
50552    unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50553        let a = _mm_set_epi32(0, 1, -1, 13);
50554        let b = _mm_set1_epi32(-1);
50555        let mask = 0b11111111;
50556        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50557        assert_eq!(r, 0b00001101);
50558    }
50559
50560    #[simd_test(enable = "avx512f")]
50561    unsafe fn test_mm512_cmple_epi32_mask() {
50562        #[rustfmt::skip]
50563        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50564                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50565        let b = _mm512_set1_epi32(-1);
50566        assert_eq!(
50567            _mm512_cmple_epi32_mask(a, b),
50568            !_mm512_cmpgt_epi32_mask(a, b)
50569        )
50570    }
50571
50572    #[simd_test(enable = "avx512f")]
50573    unsafe fn test_mm512_mask_cmple_epi32_mask() {
50574        #[rustfmt::skip]
50575        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50576                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50577        let b = _mm512_set1_epi32(-1);
50578        let mask = 0b01111010_01111010;
50579        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
50580    }
50581
50582    #[simd_test(enable = "avx512f,avx512vl")]
50583    unsafe fn test_mm256_cmple_epi32_mask() {
50584        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50585        let b = _mm256_set1_epi32(-1);
50586        let r = _mm256_cmple_epi32_mask(a, b);
50587        assert_eq!(r, 0b00100101)
50588    }
50589
50590    #[simd_test(enable = "avx512f,avx512vl")]
50591    unsafe fn test_mm256_mask_cmple_epi32_mask() {
50592        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50593        let b = _mm256_set1_epi32(-1);
50594        let mask = 0b11111111;
50595        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50596        assert_eq!(r, 0b00100101)
50597    }
50598
50599    #[simd_test(enable = "avx512f,avx512vl")]
50600    unsafe fn test_mm_cmple_epi32_mask() {
50601        let a = _mm_set_epi32(0, 1, -1, 200);
50602        let b = _mm_set1_epi32(-1);
50603        let r = _mm_cmple_epi32_mask(a, b);
50604        assert_eq!(r, 0b00000010)
50605    }
50606
50607    #[simd_test(enable = "avx512f,avx512vl")]
50608    unsafe fn test_mm_mask_cmple_epi32_mask() {
50609        let a = _mm_set_epi32(0, 1, -1, 200);
50610        let b = _mm_set1_epi32(-1);
50611        let mask = 0b11111111;
50612        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50613        assert_eq!(r, 0b00000010)
50614    }
50615
50616    #[simd_test(enable = "avx512f")]
50617    unsafe fn test_mm512_cmpge_epi32_mask() {
50618        #[rustfmt::skip]
50619        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50620                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50621        let b = _mm512_set1_epi32(-1);
50622        assert_eq!(
50623            _mm512_cmpge_epi32_mask(a, b),
50624            !_mm512_cmplt_epi32_mask(a, b)
50625        )
50626    }
50627
50628    #[simd_test(enable = "avx512f")]
50629    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50630        #[rustfmt::skip]
50631        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50632                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50633        let b = _mm512_set1_epi32(-1);
50634        let mask = 0b01111010_01111010;
50635        assert_eq!(
50636            _mm512_mask_cmpge_epi32_mask(mask, a, b),
50637            0b01111010_01111010
50638        );
50639    }
50640
50641    #[simd_test(enable = "avx512f,avx512vl")]
50642    unsafe fn test_mm256_cmpge_epi32_mask() {
50643        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50644        let b = _mm256_set1_epi32(-1);
50645        let r = _mm256_cmpge_epi32_mask(a, b);
50646        assert_eq!(r, 0b11111010)
50647    }
50648
50649    #[simd_test(enable = "avx512f,avx512vl")]
50650    unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50651        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50652        let b = _mm256_set1_epi32(-1);
50653        let mask = 0b11111111;
50654        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50655        assert_eq!(r, 0b11111010)
50656    }
50657
50658    #[simd_test(enable = "avx512f,avx512vl")]
50659    unsafe fn test_mm_cmpge_epi32_mask() {
50660        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50661        let b = _mm_set1_epi32(-1);
50662        let r = _mm_cmpge_epi32_mask(a, b);
50663        assert_eq!(r, 0b00001111)
50664    }
50665
50666    #[simd_test(enable = "avx512f,avx512vl")]
50667    unsafe fn test_mm_mask_cmpge_epi32_mask() {
50668        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50669        let b = _mm_set1_epi32(-1);
50670        let mask = 0b11111111;
50671        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50672        assert_eq!(r, 0b00001111)
50673    }
50674
50675    #[simd_test(enable = "avx512f")]
50676    unsafe fn test_mm512_cmpeq_epi32_mask() {
50677        #[rustfmt::skip]
50678        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50679                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50680        #[rustfmt::skip]
50681        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50682                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50683        let m = _mm512_cmpeq_epi32_mask(b, a);
50684        assert_eq!(m, 0b11001111_11001111);
50685    }
50686
50687    #[simd_test(enable = "avx512f")]
50688    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50689        #[rustfmt::skip]
50690        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50691                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50692        #[rustfmt::skip]
50693        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50694                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50695        let mask = 0b01111010_01111010;
50696        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50697        assert_eq!(r, 0b01001010_01001010);
50698    }
50699
50700    #[simd_test(enable = "avx512f,avx512vl")]
50701    unsafe fn test_mm256_cmpeq_epi32_mask() {
50702        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50703        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50704        let m = _mm256_cmpeq_epi32_mask(b, a);
50705        assert_eq!(m, 0b11001111);
50706    }
50707
50708    #[simd_test(enable = "avx512f,avx512vl")]
50709    unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50710        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50711        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50712        let mask = 0b01111010;
50713        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50714        assert_eq!(r, 0b01001010);
50715    }
50716
50717    #[simd_test(enable = "avx512f,avx512vl")]
50718    unsafe fn test_mm_cmpeq_epi32_mask() {
50719        let a = _mm_set_epi32(0, 1, -1, 13);
50720        let b = _mm_set_epi32(0, 1, 13, 42);
50721        let m = _mm_cmpeq_epi32_mask(b, a);
50722        assert_eq!(m, 0b00001100);
50723    }
50724
50725    #[simd_test(enable = "avx512f,avx512vl")]
50726    unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50727        let a = _mm_set_epi32(0, 1, -1, 13);
50728        let b = _mm_set_epi32(0, 1, 13, 42);
50729        let mask = 0b11111111;
50730        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50731        assert_eq!(r, 0b00001100);
50732    }
50733
50734    #[simd_test(enable = "avx512f")]
50735    unsafe fn test_mm512_cmpneq_epi32_mask() {
50736        #[rustfmt::skip]
50737        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50738                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50739        #[rustfmt::skip]
50740        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50741                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50742        let m = _mm512_cmpneq_epi32_mask(b, a);
50743        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50744    }
50745
50746    #[simd_test(enable = "avx512f")]
50747    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50748        #[rustfmt::skip]
50749        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
50750                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50751        #[rustfmt::skip]
50752        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50753                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50754        let mask = 0b01111010_01111010;
50755        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50756        assert_eq!(r, 0b00110010_00110010)
50757    }
50758
50759    #[simd_test(enable = "avx512f,avx512vl")]
50760    unsafe fn test_mm256_cmpneq_epi32_mask() {
50761        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50762        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50763        let m = _mm256_cmpneq_epi32_mask(b, a);
50764        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50765    }
50766
50767    #[simd_test(enable = "avx512f,avx512vl")]
50768    unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50769        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50770        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50771        let mask = 0b11111111;
50772        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50773        assert_eq!(r, 0b00110011)
50774    }
50775
50776    #[simd_test(enable = "avx512f,avx512vl")]
50777    unsafe fn test_mm_cmpneq_epi32_mask() {
50778        let a = _mm_set_epi32(0, 1, -1, 13);
50779        let b = _mm_set_epi32(0, 1, 13, 42);
50780        let r = _mm_cmpneq_epi32_mask(b, a);
50781        assert_eq!(r, 0b00000011)
50782    }
50783
50784    #[simd_test(enable = "avx512f,avx512vl")]
50785    unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50786        let a = _mm_set_epi32(0, 1, -1, 13);
50787        let b = _mm_set_epi32(0, 1, 13, 42);
50788        let mask = 0b11111111;
50789        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50790        assert_eq!(r, 0b00000011)
50791    }
50792
50793    #[simd_test(enable = "avx512f")]
50794    unsafe fn test_mm512_cmp_epi32_mask() {
50795        #[rustfmt::skip]
50796        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50797                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50798        let b = _mm512_set1_epi32(-1);
50799        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50800        assert_eq!(m, 0b00000101_00000101);
50801    }
50802
50803    #[simd_test(enable = "avx512f")]
50804    unsafe fn test_mm512_mask_cmp_epi32_mask() {
50805        #[rustfmt::skip]
50806        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50807                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50808        let b = _mm512_set1_epi32(-1);
50809        let mask = 0b01100110_01100110;
50810        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50811        assert_eq!(r, 0b00000100_00000100);
50812    }
50813
50814    #[simd_test(enable = "avx512f,avx512vl")]
50815    unsafe fn test_mm256_cmp_epi32_mask() {
50816        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50817        let b = _mm256_set1_epi32(-1);
50818        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50819        assert_eq!(m, 0b00000101);
50820    }
50821
50822    #[simd_test(enable = "avx512f,avx512vl")]
50823    unsafe fn test_mm256_mask_cmp_epi32_mask() {
50824        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50825        let b = _mm256_set1_epi32(-1);
50826        let mask = 0b01100110;
50827        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50828        assert_eq!(r, 0b00000100);
50829    }
50830
50831    #[simd_test(enable = "avx512f,avx512vl")]
50832    unsafe fn test_mm_cmp_epi32_mask() {
50833        let a = _mm_set_epi32(0, 1, -1, 13);
50834        let b = _mm_set1_epi32(1);
50835        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50836        assert_eq!(m, 0b00001010);
50837    }
50838
50839    #[simd_test(enable = "avx512f,avx512vl")]
50840    unsafe fn test_mm_mask_cmp_epi32_mask() {
50841        let a = _mm_set_epi32(0, 1, -1, 13);
50842        let b = _mm_set1_epi32(1);
50843        let mask = 0b11111111;
50844        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50845        assert_eq!(r, 0b00001010);
50846    }
50847
50848    #[simd_test(enable = "avx512f")]
50849    unsafe fn test_mm512_set_epi8() {
50850        let r = _mm512_set1_epi8(2);
50851        assert_eq_m512i(
50852            r,
50853            _mm512_set_epi8(
50854                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50855                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50856                2, 2, 2, 2, 2, 2, 2, 2,
50857            ),
50858        )
50859    }
50860
50861    #[simd_test(enable = "avx512f")]
50862    unsafe fn test_mm512_set_epi16() {
50863        let r = _mm512_set1_epi16(2);
50864        assert_eq_m512i(
50865            r,
50866            _mm512_set_epi16(
50867                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50868                2, 2, 2, 2,
50869            ),
50870        )
50871    }
50872
50873    #[simd_test(enable = "avx512f")]
50874    unsafe fn test_mm512_set_epi32() {
50875        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50876        assert_eq_m512i(
50877            r,
50878            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50879        )
50880    }
50881
50882    #[simd_test(enable = "avx512f")]
50883    unsafe fn test_mm512_setr_epi32() {
50884        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50885        assert_eq_m512i(
50886            r,
50887            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50888        )
50889    }
50890
50891    #[simd_test(enable = "avx512f")]
50892    unsafe fn test_mm512_set1_epi8() {
50893        let r = _mm512_set_epi8(
50894            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50895            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50896            2, 2, 2, 2, 2, 2,
50897        );
50898        assert_eq_m512i(r, _mm512_set1_epi8(2));
50899    }
50900
50901    #[simd_test(enable = "avx512f")]
50902    unsafe fn test_mm512_set1_epi16() {
50903        let r = _mm512_set_epi16(
50904            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50905            2, 2, 2,
50906        );
50907        assert_eq_m512i(r, _mm512_set1_epi16(2));
50908    }
50909
50910    #[simd_test(enable = "avx512f")]
50911    unsafe fn test_mm512_set1_epi32() {
50912        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50913        assert_eq_m512i(r, _mm512_set1_epi32(2));
50914    }
50915
50916    #[simd_test(enable = "avx512f")]
50917    unsafe fn test_mm512_setzero_si512() {
50918        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
50919    }
50920
50921    #[simd_test(enable = "avx512f")]
50922    unsafe fn test_mm512_setzero_epi32() {
50923        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
50924    }
50925
50926    #[simd_test(enable = "avx512f")]
50927    unsafe fn test_mm512_set_ps() {
50928        let r = _mm512_setr_ps(
50929            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50930        );
50931        assert_eq_m512(
50932            r,
50933            _mm512_set_ps(
50934                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50935            ),
50936        )
50937    }
50938
50939    #[simd_test(enable = "avx512f")]
50940    unsafe fn test_mm512_setr_ps() {
50941        let r = _mm512_set_ps(
50942            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50943        );
50944        assert_eq_m512(
50945            r,
50946            _mm512_setr_ps(
50947                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50948            ),
50949        )
50950    }
50951
50952    #[simd_test(enable = "avx512f")]
50953    unsafe fn test_mm512_set1_ps() {
50954        #[rustfmt::skip]
50955        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
50956                                     2., 2., 2., 2., 2., 2., 2., 2.);
50957        assert_eq_m512(expected, _mm512_set1_ps(2.));
50958    }
50959
50960    #[simd_test(enable = "avx512f")]
50961    unsafe fn test_mm512_set4_epi32() {
50962        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
50963        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
50964    }
50965
50966    #[simd_test(enable = "avx512f")]
50967    unsafe fn test_mm512_set4_ps() {
50968        let r = _mm512_set_ps(
50969            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
50970        );
50971        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
50972    }
50973
50974    #[simd_test(enable = "avx512f")]
50975    unsafe fn test_mm512_setr4_epi32() {
50976        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
50977        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
50978    }
50979
50980    #[simd_test(enable = "avx512f")]
50981    unsafe fn test_mm512_setr4_ps() {
50982        let r = _mm512_set_ps(
50983            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
50984        );
50985        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
50986    }
50987
50988    #[simd_test(enable = "avx512f")]
50989    unsafe fn test_mm512_setzero_ps() {
50990        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
50991    }
50992
50993    #[simd_test(enable = "avx512f")]
50994    unsafe fn test_mm512_setzero() {
50995        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
50996    }
50997
50998    #[simd_test(enable = "avx512f")]
50999    unsafe fn test_mm512_loadu_pd() {
51000        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
51001        let p = a.as_ptr();
51002        let r = _mm512_loadu_pd(black_box(p));
51003        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
51004        assert_eq_m512d(r, e);
51005    }
51006
51007    #[simd_test(enable = "avx512f")]
51008    unsafe fn test_mm512_storeu_pd() {
51009        let a = _mm512_set1_pd(9.);
51010        let mut r = _mm512_undefined_pd();
51011        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51012        assert_eq_m512d(r, a);
51013    }
51014
51015    #[simd_test(enable = "avx512f")]
51016    unsafe fn test_mm512_loadu_ps() {
51017        let a = &[
51018            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51019        ];
51020        let p = a.as_ptr();
51021        let r = _mm512_loadu_ps(black_box(p));
51022        let e = _mm512_setr_ps(
51023            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51024        );
51025        assert_eq_m512(r, e);
51026    }
51027
51028    #[simd_test(enable = "avx512f")]
51029    unsafe fn test_mm512_storeu_ps() {
51030        let a = _mm512_set1_ps(9.);
51031        let mut r = _mm512_undefined_ps();
51032        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51033        assert_eq_m512(r, a);
51034    }
51035
51036    #[simd_test(enable = "avx512f")]
51037    unsafe fn test_mm512_mask_loadu_epi32() {
51038        let src = _mm512_set1_epi32(42);
51039        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51040        let p = a.as_ptr();
51041        let m = 0b11101000_11001010;
51042        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51043        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51044        assert_eq_m512i(r, e);
51045    }
51046
51047    #[simd_test(enable = "avx512f")]
51048    unsafe fn test_mm512_maskz_loadu_epi32() {
51049        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51050        let p = a.as_ptr();
51051        let m = 0b11101000_11001010;
51052        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51053        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51054        assert_eq_m512i(r, e);
51055    }
51056
51057    #[simd_test(enable = "avx512f")]
51058    unsafe fn test_mm512_mask_load_epi32() {
51059        #[repr(align(64))]
51060        struct Align {
51061            data: [i32; 16], // 64 bytes
51062        }
51063        let src = _mm512_set1_epi32(42);
51064        let a = Align {
51065            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51066        };
51067        let p = a.data.as_ptr();
51068        let m = 0b11101000_11001010;
51069        let r = _mm512_mask_load_epi32(src, m, black_box(p));
51070        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51071        assert_eq_m512i(r, e);
51072    }
51073
51074    #[simd_test(enable = "avx512f")]
51075    unsafe fn test_mm512_maskz_load_epi32() {
51076        #[repr(align(64))]
51077        struct Align {
51078            data: [i32; 16], // 64 bytes
51079        }
51080        let a = Align {
51081            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51082        };
51083        let p = a.data.as_ptr();
51084        let m = 0b11101000_11001010;
51085        let r = _mm512_maskz_load_epi32(m, black_box(p));
51086        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51087        assert_eq_m512i(r, e);
51088    }
51089
51090    #[simd_test(enable = "avx512f")]
51091    unsafe fn test_mm512_mask_storeu_epi32() {
51092        let mut r = [42_i32; 16];
51093        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51094        let m = 0b11101000_11001010;
51095        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51096        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51097        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51098    }
51099
51100    #[simd_test(enable = "avx512f")]
51101    unsafe fn test_mm512_mask_store_epi32() {
51102        #[repr(align(64))]
51103        struct Align {
51104            data: [i32; 16],
51105        }
51106        let mut r = Align { data: [42; 16] };
51107        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51108        let m = 0b11101000_11001010;
51109        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51110        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51111        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51112    }
51113
51114    #[simd_test(enable = "avx512f")]
51115    unsafe fn test_mm512_mask_loadu_epi64() {
51116        let src = _mm512_set1_epi64(42);
51117        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51118        let p = a.as_ptr();
51119        let m = 0b11001010;
51120        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51121        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51122        assert_eq_m512i(r, e);
51123    }
51124
51125    #[simd_test(enable = "avx512f")]
51126    unsafe fn test_mm512_maskz_loadu_epi64() {
51127        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51128        let p = a.as_ptr();
51129        let m = 0b11001010;
51130        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51131        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51132        assert_eq_m512i(r, e);
51133    }
51134
51135    #[simd_test(enable = "avx512f")]
51136    unsafe fn test_mm512_mask_load_epi64() {
51137        #[repr(align(64))]
51138        struct Align {
51139            data: [i64; 8], // 64 bytes
51140        }
51141        let src = _mm512_set1_epi64(42);
51142        let a = Align {
51143            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51144        };
51145        let p = a.data.as_ptr();
51146        let m = 0b11001010;
51147        let r = _mm512_mask_load_epi64(src, m, black_box(p));
51148        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51149        assert_eq_m512i(r, e);
51150    }
51151
51152    #[simd_test(enable = "avx512f")]
51153    unsafe fn test_mm512_maskz_load_epi64() {
51154        #[repr(align(64))]
51155        struct Align {
51156            data: [i64; 8], // 64 bytes
51157        }
51158        let a = Align {
51159            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51160        };
51161        let p = a.data.as_ptr();
51162        let m = 0b11001010;
51163        let r = _mm512_maskz_load_epi64(m, black_box(p));
51164        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51165        assert_eq_m512i(r, e);
51166    }
51167
51168    #[simd_test(enable = "avx512f")]
51169    unsafe fn test_mm512_mask_storeu_epi64() {
51170        let mut r = [42_i64; 8];
51171        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51172        let m = 0b11001010;
51173        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51174        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51175        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51176    }
51177
51178    #[simd_test(enable = "avx512f")]
51179    unsafe fn test_mm512_mask_store_epi64() {
51180        #[repr(align(64))]
51181        struct Align {
51182            data: [i64; 8],
51183        }
51184        let mut r = Align { data: [42; 8] };
51185        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51186        let m = 0b11001010;
51187        let p = r.data.as_mut_ptr();
51188        _mm512_mask_store_epi64(p, m, a);
51189        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51190        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51191    }
51192
51193    #[simd_test(enable = "avx512f")]
51194    unsafe fn test_mm512_mask_loadu_ps() {
51195        let src = _mm512_set1_ps(42.0);
51196        let a = &[
51197            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51198            16.0,
51199        ];
51200        let p = a.as_ptr();
51201        let m = 0b11101000_11001010;
51202        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51203        let e = _mm512_setr_ps(
51204            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51205            16.0,
51206        );
51207        assert_eq_m512(r, e);
51208    }
51209
51210    #[simd_test(enable = "avx512f")]
51211    unsafe fn test_mm512_maskz_loadu_ps() {
51212        let a = &[
51213            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51214            16.0,
51215        ];
51216        let p = a.as_ptr();
51217        let m = 0b11101000_11001010;
51218        let r = _mm512_maskz_loadu_ps(m, black_box(p));
51219        let e = _mm512_setr_ps(
51220            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51221        );
51222        assert_eq_m512(r, e);
51223    }
51224
51225    #[simd_test(enable = "avx512f")]
51226    unsafe fn test_mm512_mask_load_ps() {
51227        #[repr(align(64))]
51228        struct Align {
51229            data: [f32; 16], // 64 bytes
51230        }
51231        let src = _mm512_set1_ps(42.0);
51232        let a = Align {
51233            data: [
51234                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51235                15.0, 16.0,
51236            ],
51237        };
51238        let p = a.data.as_ptr();
51239        let m = 0b11101000_11001010;
51240        let r = _mm512_mask_load_ps(src, m, black_box(p));
51241        let e = _mm512_setr_ps(
51242            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51243            16.0,
51244        );
51245        assert_eq_m512(r, e);
51246    }
51247
51248    #[simd_test(enable = "avx512f")]
51249    unsafe fn test_mm512_maskz_load_ps() {
51250        #[repr(align(64))]
51251        struct Align {
51252            data: [f32; 16], // 64 bytes
51253        }
51254        let a = Align {
51255            data: [
51256                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51257                15.0, 16.0,
51258            ],
51259        };
51260        let p = a.data.as_ptr();
51261        let m = 0b11101000_11001010;
51262        let r = _mm512_maskz_load_ps(m, black_box(p));
51263        let e = _mm512_setr_ps(
51264            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51265        );
51266        assert_eq_m512(r, e);
51267    }
51268
51269    #[simd_test(enable = "avx512f")]
51270    unsafe fn test_mm512_mask_storeu_ps() {
51271        let mut r = [42_f32; 16];
51272        let a = _mm512_setr_ps(
51273            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51274        );
51275        let m = 0b11101000_11001010;
51276        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51277        let e = _mm512_setr_ps(
51278            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51279            16.0,
51280        );
51281        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51282    }
51283
51284    #[simd_test(enable = "avx512f")]
51285    unsafe fn test_mm512_mask_store_ps() {
51286        #[repr(align(64))]
51287        struct Align {
51288            data: [f32; 16],
51289        }
51290        let mut r = Align { data: [42.0; 16] };
51291        let a = _mm512_setr_ps(
51292            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51293        );
51294        let m = 0b11101000_11001010;
51295        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51296        let e = _mm512_setr_ps(
51297            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51298            16.0,
51299        );
51300        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51301    }
51302
51303    #[simd_test(enable = "avx512f")]
51304    unsafe fn test_mm512_mask_loadu_pd() {
51305        let src = _mm512_set1_pd(42.0);
51306        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51307        let p = a.as_ptr();
51308        let m = 0b11001010;
51309        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51310        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51311        assert_eq_m512d(r, e);
51312    }
51313
51314    #[simd_test(enable = "avx512f")]
51315    unsafe fn test_mm512_maskz_loadu_pd() {
51316        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51317        let p = a.as_ptr();
51318        let m = 0b11001010;
51319        let r = _mm512_maskz_loadu_pd(m, black_box(p));
51320        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51321        assert_eq_m512d(r, e);
51322    }
51323
51324    #[simd_test(enable = "avx512f")]
51325    unsafe fn test_mm512_mask_load_pd() {
51326        #[repr(align(64))]
51327        struct Align {
51328            data: [f64; 8], // 64 bytes
51329        }
51330        let src = _mm512_set1_pd(42.0);
51331        let a = Align {
51332            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51333        };
51334        let p = a.data.as_ptr();
51335        let m = 0b11001010;
51336        let r = _mm512_mask_load_pd(src, m, black_box(p));
51337        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51338        assert_eq_m512d(r, e);
51339    }
51340
51341    #[simd_test(enable = "avx512f")]
51342    unsafe fn test_mm512_maskz_load_pd() {
51343        #[repr(align(64))]
51344        struct Align {
51345            data: [f64; 8], // 64 bytes
51346        }
51347        let a = Align {
51348            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51349        };
51350        let p = a.data.as_ptr();
51351        let m = 0b11001010;
51352        let r = _mm512_maskz_load_pd(m, black_box(p));
51353        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51354        assert_eq_m512d(r, e);
51355    }
51356
51357    #[simd_test(enable = "avx512f")]
51358    unsafe fn test_mm512_mask_storeu_pd() {
51359        let mut r = [42_f64; 8];
51360        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51361        let m = 0b11001010;
51362        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51363        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51364        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51365    }
51366
51367    #[simd_test(enable = "avx512f")]
51368    unsafe fn test_mm512_mask_store_pd() {
51369        #[repr(align(64))]
51370        struct Align {
51371            data: [f64; 8],
51372        }
51373        let mut r = Align { data: [42.0; 8] };
51374        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51375        let m = 0b11001010;
51376        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51377        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51378        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51379    }
51380
51381    #[simd_test(enable = "avx512f,avx512vl")]
51382    unsafe fn test_mm256_mask_loadu_epi32() {
51383        let src = _mm256_set1_epi32(42);
51384        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51385        let p = a.as_ptr();
51386        let m = 0b11001010;
51387        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51388        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51389        assert_eq_m256i(r, e);
51390    }
51391
51392    #[simd_test(enable = "avx512f,avx512vl")]
51393    unsafe fn test_mm256_maskz_loadu_epi32() {
51394        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51395        let p = a.as_ptr();
51396        let m = 0b11001010;
51397        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51398        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51399        assert_eq_m256i(r, e);
51400    }
51401
51402    #[simd_test(enable = "avx512f,avx512vl")]
51403    unsafe fn test_mm256_mask_load_epi32() {
51404        #[repr(align(32))]
51405        struct Align {
51406            data: [i32; 8], // 32 bytes
51407        }
51408        let src = _mm256_set1_epi32(42);
51409        let a = Align {
51410            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51411        };
51412        let p = a.data.as_ptr();
51413        let m = 0b11001010;
51414        let r = _mm256_mask_load_epi32(src, m, black_box(p));
51415        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51416        assert_eq_m256i(r, e);
51417    }
51418
51419    #[simd_test(enable = "avx512f,avx512vl")]
51420    unsafe fn test_mm256_maskz_load_epi32() {
51421        #[repr(align(32))]
51422        struct Align {
51423            data: [i32; 8], // 32 bytes
51424        }
51425        let a = Align {
51426            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51427        };
51428        let p = a.data.as_ptr();
51429        let m = 0b11001010;
51430        let r = _mm256_maskz_load_epi32(m, black_box(p));
51431        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51432        assert_eq_m256i(r, e);
51433    }
51434
51435    #[simd_test(enable = "avx512f,avx512vl")]
51436    unsafe fn test_mm256_mask_storeu_epi32() {
51437        let mut r = [42_i32; 8];
51438        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51439        let m = 0b11001010;
51440        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51441        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51442        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51443    }
51444
51445    #[simd_test(enable = "avx512f,avx512vl")]
51446    unsafe fn test_mm256_mask_store_epi32() {
51447        #[repr(align(64))]
51448        struct Align {
51449            data: [i32; 8],
51450        }
51451        let mut r = Align { data: [42; 8] };
51452        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51453        let m = 0b11001010;
51454        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51455        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51456        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51457    }
51458
51459    #[simd_test(enable = "avx512f,avx512vl")]
51460    unsafe fn test_mm256_mask_loadu_epi64() {
51461        let src = _mm256_set1_epi64x(42);
51462        let a = &[1_i64, 2, 3, 4];
51463        let p = a.as_ptr();
51464        let m = 0b1010;
51465        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51466        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51467        assert_eq_m256i(r, e);
51468    }
51469
51470    #[simd_test(enable = "avx512f,avx512vl")]
51471    unsafe fn test_mm256_maskz_loadu_epi64() {
51472        let a = &[1_i64, 2, 3, 4];
51473        let p = a.as_ptr();
51474        let m = 0b1010;
51475        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51476        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51477        assert_eq_m256i(r, e);
51478    }
51479
51480    #[simd_test(enable = "avx512f,avx512vl")]
51481    unsafe fn test_mm256_mask_load_epi64() {
51482        #[repr(align(32))]
51483        struct Align {
51484            data: [i64; 4], // 32 bytes
51485        }
51486        let src = _mm256_set1_epi64x(42);
51487        let a = Align {
51488            data: [1_i64, 2, 3, 4],
51489        };
51490        let p = a.data.as_ptr();
51491        let m = 0b1010;
51492        let r = _mm256_mask_load_epi64(src, m, black_box(p));
51493        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51494        assert_eq_m256i(r, e);
51495    }
51496
51497    #[simd_test(enable = "avx512f,avx512vl")]
51498    unsafe fn test_mm256_maskz_load_epi64() {
51499        #[repr(align(32))]
51500        struct Align {
51501            data: [i64; 4], // 32 bytes
51502        }
51503        let a = Align {
51504            data: [1_i64, 2, 3, 4],
51505        };
51506        let p = a.data.as_ptr();
51507        let m = 0b1010;
51508        let r = _mm256_maskz_load_epi64(m, black_box(p));
51509        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51510        assert_eq_m256i(r, e);
51511    }
51512
51513    #[simd_test(enable = "avx512f,avx512vl")]
51514    unsafe fn test_mm256_mask_storeu_epi64() {
51515        let mut r = [42_i64; 4];
51516        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51517        let m = 0b1010;
51518        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51519        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51520        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51521    }
51522
51523    #[simd_test(enable = "avx512f,avx512vl")]
51524    unsafe fn test_mm256_mask_store_epi64() {
51525        #[repr(align(32))]
51526        struct Align {
51527            data: [i64; 4],
51528        }
51529        let mut r = Align { data: [42; 4] };
51530        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51531        let m = 0b1010;
51532        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51533        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51534        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51535    }
51536
51537    #[simd_test(enable = "avx512f,avx512vl")]
51538    unsafe fn test_mm256_mask_loadu_ps() {
51539        let src = _mm256_set1_ps(42.0);
51540        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51541        let p = a.as_ptr();
51542        let m = 0b11001010;
51543        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51544        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51545        assert_eq_m256(r, e);
51546    }
51547
51548    #[simd_test(enable = "avx512f,avx512vl")]
51549    unsafe fn test_mm256_maskz_loadu_ps() {
51550        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51551        let p = a.as_ptr();
51552        let m = 0b11001010;
51553        let r = _mm256_maskz_loadu_ps(m, black_box(p));
51554        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51555        assert_eq_m256(r, e);
51556    }
51557
51558    #[simd_test(enable = "avx512f,avx512vl")]
51559    unsafe fn test_mm256_mask_load_ps() {
51560        #[repr(align(32))]
51561        struct Align {
51562            data: [f32; 8], // 32 bytes
51563        }
51564        let src = _mm256_set1_ps(42.0);
51565        let a = Align {
51566            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51567        };
51568        let p = a.data.as_ptr();
51569        let m = 0b11001010;
51570        let r = _mm256_mask_load_ps(src, m, black_box(p));
51571        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51572        assert_eq_m256(r, e);
51573    }
51574
51575    #[simd_test(enable = "avx512f,avx512vl")]
51576    unsafe fn test_mm256_maskz_load_ps() {
51577        #[repr(align(32))]
51578        struct Align {
51579            data: [f32; 8], // 32 bytes
51580        }
51581        let a = Align {
51582            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51583        };
51584        let p = a.data.as_ptr();
51585        let m = 0b11001010;
51586        let r = _mm256_maskz_load_ps(m, black_box(p));
51587        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51588        assert_eq_m256(r, e);
51589    }
51590
51591    #[simd_test(enable = "avx512f,avx512vl")]
51592    unsafe fn test_mm256_mask_storeu_ps() {
51593        let mut r = [42_f32; 8];
51594        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51595        let m = 0b11001010;
51596        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51597        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51598        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51599    }
51600
51601    #[simd_test(enable = "avx512f,avx512vl")]
51602    unsafe fn test_mm256_mask_store_ps() {
51603        #[repr(align(32))]
51604        struct Align {
51605            data: [f32; 8],
51606        }
51607        let mut r = Align { data: [42.0; 8] };
51608        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51609        let m = 0b11001010;
51610        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51611        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51612        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51613    }
51614
51615    #[simd_test(enable = "avx512f,avx512vl")]
51616    unsafe fn test_mm256_mask_loadu_pd() {
51617        let src = _mm256_set1_pd(42.0);
51618        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51619        let p = a.as_ptr();
51620        let m = 0b1010;
51621        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51622        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51623        assert_eq_m256d(r, e);
51624    }
51625
51626    #[simd_test(enable = "avx512f,avx512vl")]
51627    unsafe fn test_mm256_maskz_loadu_pd() {
51628        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51629        let p = a.as_ptr();
51630        let m = 0b1010;
51631        let r = _mm256_maskz_loadu_pd(m, black_box(p));
51632        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51633        assert_eq_m256d(r, e);
51634    }
51635
51636    #[simd_test(enable = "avx512f,avx512vl")]
51637    unsafe fn test_mm256_mask_load_pd() {
51638        #[repr(align(32))]
51639        struct Align {
51640            data: [f64; 4], // 32 bytes
51641        }
51642        let src = _mm256_set1_pd(42.0);
51643        let a = Align {
51644            data: [1.0_f64, 2.0, 3.0, 4.0],
51645        };
51646        let p = a.data.as_ptr();
51647        let m = 0b1010;
51648        let r = _mm256_mask_load_pd(src, m, black_box(p));
51649        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51650        assert_eq_m256d(r, e);
51651    }
51652
51653    #[simd_test(enable = "avx512f,avx512vl")]
51654    unsafe fn test_mm256_maskz_load_pd() {
51655        #[repr(align(32))]
51656        struct Align {
51657            data: [f64; 4], // 32 bytes
51658        }
51659        let a = Align {
51660            data: [1.0_f64, 2.0, 3.0, 4.0],
51661        };
51662        let p = a.data.as_ptr();
51663        let m = 0b1010;
51664        let r = _mm256_maskz_load_pd(m, black_box(p));
51665        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51666        assert_eq_m256d(r, e);
51667    }
51668
51669    #[simd_test(enable = "avx512f,avx512vl")]
51670    unsafe fn test_mm256_mask_storeu_pd() {
51671        let mut r = [42_f64; 4];
51672        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51673        let m = 0b1010;
51674        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51675        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51676        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51677    }
51678
51679    #[simd_test(enable = "avx512f,avx512vl")]
51680    unsafe fn test_mm256_mask_store_pd() {
51681        #[repr(align(32))]
51682        struct Align {
51683            data: [f64; 4],
51684        }
51685        let mut r = Align { data: [42.0; 4] };
51686        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51687        let m = 0b1010;
51688        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51689        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51690        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51691    }
51692
51693    #[simd_test(enable = "avx512f,avx512vl")]
51694    unsafe fn test_mm_mask_loadu_epi32() {
51695        let src = _mm_set1_epi32(42);
51696        let a = &[1_i32, 2, 3, 4];
51697        let p = a.as_ptr();
51698        let m = 0b1010;
51699        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51700        let e = _mm_setr_epi32(42, 2, 42, 4);
51701        assert_eq_m128i(r, e);
51702    }
51703
51704    #[simd_test(enable = "avx512f,avx512vl")]
51705    unsafe fn test_mm_maskz_loadu_epi32() {
51706        let a = &[1_i32, 2, 3, 4];
51707        let p = a.as_ptr();
51708        let m = 0b1010;
51709        let r = _mm_maskz_loadu_epi32(m, black_box(p));
51710        let e = _mm_setr_epi32(0, 2, 0, 4);
51711        assert_eq_m128i(r, e);
51712    }
51713
51714    #[simd_test(enable = "avx512f,avx512vl")]
51715    unsafe fn test_mm_mask_load_epi32() {
51716        #[repr(align(16))]
51717        struct Align {
51718            data: [i32; 4], // 32 bytes
51719        }
51720        let src = _mm_set1_epi32(42);
51721        let a = Align {
51722            data: [1_i32, 2, 3, 4],
51723        };
51724        let p = a.data.as_ptr();
51725        let m = 0b1010;
51726        let r = _mm_mask_load_epi32(src, m, black_box(p));
51727        let e = _mm_setr_epi32(42, 2, 42, 4);
51728        assert_eq_m128i(r, e);
51729    }
51730
51731    #[simd_test(enable = "avx512f,avx512vl")]
51732    unsafe fn test_mm_maskz_load_epi32() {
51733        #[repr(align(16))]
51734        struct Align {
51735            data: [i32; 4], // 16 bytes
51736        }
51737        let a = Align {
51738            data: [1_i32, 2, 3, 4],
51739        };
51740        let p = a.data.as_ptr();
51741        let m = 0b1010;
51742        let r = _mm_maskz_load_epi32(m, black_box(p));
51743        let e = _mm_setr_epi32(0, 2, 0, 4);
51744        assert_eq_m128i(r, e);
51745    }
51746
51747    #[simd_test(enable = "avx512f,avx512vl")]
51748    unsafe fn test_mm_mask_storeu_epi32() {
51749        let mut r = [42_i32; 4];
51750        let a = _mm_setr_epi32(1, 2, 3, 4);
51751        let m = 0b1010;
51752        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51753        let e = _mm_setr_epi32(42, 2, 42, 4);
51754        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51755    }
51756
51757    #[simd_test(enable = "avx512f,avx512vl")]
51758    unsafe fn test_mm_mask_store_epi32() {
51759        #[repr(align(16))]
51760        struct Align {
51761            data: [i32; 4], // 16 bytes
51762        }
51763        let mut r = Align { data: [42; 4] };
51764        let a = _mm_setr_epi32(1, 2, 3, 4);
51765        let m = 0b1010;
51766        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51767        let e = _mm_setr_epi32(42, 2, 42, 4);
51768        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51769    }
51770
51771    #[simd_test(enable = "avx512f,avx512vl")]
51772    unsafe fn test_mm_mask_loadu_epi64() {
51773        let src = _mm_set1_epi64x(42);
51774        let a = &[1_i64, 2];
51775        let p = a.as_ptr();
51776        let m = 0b10;
51777        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51778        let e = _mm_setr_epi64x(42, 2);
51779        assert_eq_m128i(r, e);
51780    }
51781
51782    #[simd_test(enable = "avx512f,avx512vl")]
51783    unsafe fn test_mm_maskz_loadu_epi64() {
51784        let a = &[1_i64, 2];
51785        let p = a.as_ptr();
51786        let m = 0b10;
51787        let r = _mm_maskz_loadu_epi64(m, black_box(p));
51788        let e = _mm_setr_epi64x(0, 2);
51789        assert_eq_m128i(r, e);
51790    }
51791
51792    #[simd_test(enable = "avx512f,avx512vl")]
51793    unsafe fn test_mm_mask_load_epi64() {
51794        #[repr(align(16))]
51795        struct Align {
51796            data: [i64; 2], // 16 bytes
51797        }
51798        let src = _mm_set1_epi64x(42);
51799        let a = Align { data: [1_i64, 2] };
51800        let p = a.data.as_ptr();
51801        let m = 0b10;
51802        let r = _mm_mask_load_epi64(src, m, black_box(p));
51803        let e = _mm_setr_epi64x(42, 2);
51804        assert_eq_m128i(r, e);
51805    }
51806
51807    #[simd_test(enable = "avx512f,avx512vl")]
51808    unsafe fn test_mm_maskz_load_epi64() {
51809        #[repr(align(16))]
51810        struct Align {
51811            data: [i64; 2], // 16 bytes
51812        }
51813        let a = Align { data: [1_i64, 2] };
51814        let p = a.data.as_ptr();
51815        let m = 0b10;
51816        let r = _mm_maskz_load_epi64(m, black_box(p));
51817        let e = _mm_setr_epi64x(0, 2);
51818        assert_eq_m128i(r, e);
51819    }
51820
51821    #[simd_test(enable = "avx512f,avx512vl")]
51822    unsafe fn test_mm_mask_storeu_epi64() {
51823        let mut r = [42_i64; 2];
51824        let a = _mm_setr_epi64x(1, 2);
51825        let m = 0b10;
51826        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51827        let e = _mm_setr_epi64x(42, 2);
51828        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51829    }
51830
51831    #[simd_test(enable = "avx512f,avx512vl")]
51832    unsafe fn test_mm_mask_store_epi64() {
51833        #[repr(align(16))]
51834        struct Align {
51835            data: [i64; 2], // 16 bytes
51836        }
51837        let mut r = Align { data: [42; 2] };
51838        let a = _mm_setr_epi64x(1, 2);
51839        let m = 0b10;
51840        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51841        let e = _mm_setr_epi64x(42, 2);
51842        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51843    }
51844
51845    #[simd_test(enable = "avx512f,avx512vl")]
51846    unsafe fn test_mm_mask_loadu_ps() {
51847        let src = _mm_set1_ps(42.0);
51848        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51849        let p = a.as_ptr();
51850        let m = 0b1010;
51851        let r = _mm_mask_loadu_ps(src, m, black_box(p));
51852        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51853        assert_eq_m128(r, e);
51854    }
51855
51856    #[simd_test(enable = "avx512f,avx512vl")]
51857    unsafe fn test_mm_maskz_loadu_ps() {
51858        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51859        let p = a.as_ptr();
51860        let m = 0b1010;
51861        let r = _mm_maskz_loadu_ps(m, black_box(p));
51862        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51863        assert_eq_m128(r, e);
51864    }
51865
51866    #[simd_test(enable = "avx512f,avx512vl")]
51867    unsafe fn test_mm_mask_load_ps() {
51868        #[repr(align(16))]
51869        struct Align {
51870            data: [f32; 4], // 16 bytes
51871        }
51872        let src = _mm_set1_ps(42.0);
51873        let a = Align {
51874            data: [1.0_f32, 2.0, 3.0, 4.0],
51875        };
51876        let p = a.data.as_ptr();
51877        let m = 0b1010;
51878        let r = _mm_mask_load_ps(src, m, black_box(p));
51879        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51880        assert_eq_m128(r, e);
51881    }
51882
51883    #[simd_test(enable = "avx512f,avx512vl")]
51884    unsafe fn test_mm_maskz_load_ps() {
51885        #[repr(align(16))]
51886        struct Align {
51887            data: [f32; 4], // 16 bytes
51888        }
51889        let a = Align {
51890            data: [1.0_f32, 2.0, 3.0, 4.0],
51891        };
51892        let p = a.data.as_ptr();
51893        let m = 0b1010;
51894        let r = _mm_maskz_load_ps(m, black_box(p));
51895        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51896        assert_eq_m128(r, e);
51897    }
51898
51899    #[simd_test(enable = "avx512f,avx512vl")]
51900    unsafe fn test_mm_mask_storeu_ps() {
51901        let mut r = [42_f32; 4];
51902        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51903        let m = 0b1010;
51904        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
51905        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51906        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
51907    }
51908
51909    #[simd_test(enable = "avx512f,avx512vl")]
51910    unsafe fn test_mm_mask_store_ps() {
51911        #[repr(align(16))]
51912        struct Align {
51913            data: [f32; 4], // 16 bytes
51914        }
51915        let mut r = Align { data: [42.0; 4] };
51916        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51917        let m = 0b1010;
51918        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
51919        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51920        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
51921    }
51922
51923    #[simd_test(enable = "avx512f,avx512vl")]
51924    unsafe fn test_mm_mask_loadu_pd() {
51925        let src = _mm_set1_pd(42.0);
51926        let a = &[1.0_f64, 2.0];
51927        let p = a.as_ptr();
51928        let m = 0b10;
51929        let r = _mm_mask_loadu_pd(src, m, black_box(p));
51930        let e = _mm_setr_pd(42.0, 2.0);
51931        assert_eq_m128d(r, e);
51932    }
51933
51934    #[simd_test(enable = "avx512f,avx512vl")]
51935    unsafe fn test_mm_maskz_loadu_pd() {
51936        let a = &[1.0_f64, 2.0];
51937        let p = a.as_ptr();
51938        let m = 0b10;
51939        let r = _mm_maskz_loadu_pd(m, black_box(p));
51940        let e = _mm_setr_pd(0.0, 2.0);
51941        assert_eq_m128d(r, e);
51942    }
51943
51944    #[simd_test(enable = "avx512f,avx512vl")]
51945    unsafe fn test_mm_mask_load_pd() {
51946        #[repr(align(16))]
51947        struct Align {
51948            data: [f64; 2], // 16 bytes
51949        }
51950        let src = _mm_set1_pd(42.0);
51951        let a = Align {
51952            data: [1.0_f64, 2.0],
51953        };
51954        let p = a.data.as_ptr();
51955        let m = 0b10;
51956        let r = _mm_mask_load_pd(src, m, black_box(p));
51957        let e = _mm_setr_pd(42.0, 2.0);
51958        assert_eq_m128d(r, e);
51959    }
51960
51961    #[simd_test(enable = "avx512f,avx512vl")]
51962    unsafe fn test_mm_maskz_load_pd() {
51963        #[repr(align(16))]
51964        struct Align {
51965            data: [f64; 2], // 16 bytes
51966        }
51967        let a = Align {
51968            data: [1.0_f64, 2.0],
51969        };
51970        let p = a.data.as_ptr();
51971        let m = 0b10;
51972        let r = _mm_maskz_load_pd(m, black_box(p));
51973        let e = _mm_setr_pd(0.0, 2.0);
51974        assert_eq_m128d(r, e);
51975    }
51976
51977    #[simd_test(enable = "avx512f")]
51978    unsafe fn test_mm_mask_load_ss() {
51979        #[repr(align(16))]
51980        struct Align {
51981            data: f32,
51982        }
51983        let src = _mm_set_ss(2.0);
51984        let mem = Align { data: 1.0 };
51985        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
51986        assert_eq_m128(r, _mm_set_ss(1.0));
51987        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
51988        assert_eq_m128(r, _mm_set_ss(2.0));
51989    }
51990
51991    #[simd_test(enable = "avx512f")]
51992    unsafe fn test_mm_maskz_load_ss() {
51993        #[repr(align(16))]
51994        struct Align {
51995            data: f32,
51996        }
51997        let mem = Align { data: 1.0 };
51998        let r = _mm_maskz_load_ss(0b1, &mem.data);
51999        assert_eq_m128(r, _mm_set_ss(1.0));
52000        let r = _mm_maskz_load_ss(0b0, &mem.data);
52001        assert_eq_m128(r, _mm_set_ss(0.0));
52002    }
52003
52004    #[simd_test(enable = "avx512f")]
52005    unsafe fn test_mm_mask_load_sd() {
52006        #[repr(align(16))]
52007        struct Align {
52008            data: f64,
52009        }
52010        let src = _mm_set_sd(2.0);
52011        let mem = Align { data: 1.0 };
52012        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
52013        assert_eq_m128d(r, _mm_set_sd(1.0));
52014        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
52015        assert_eq_m128d(r, _mm_set_sd(2.0));
52016    }
52017
52018    #[simd_test(enable = "avx512f")]
52019    unsafe fn test_mm_maskz_load_sd() {
52020        #[repr(align(16))]
52021        struct Align {
52022            data: f64,
52023        }
52024        let mem = Align { data: 1.0 };
52025        let r = _mm_maskz_load_sd(0b1, &mem.data);
52026        assert_eq_m128d(r, _mm_set_sd(1.0));
52027        let r = _mm_maskz_load_sd(0b0, &mem.data);
52028        assert_eq_m128d(r, _mm_set_sd(0.0));
52029    }
52030
52031    #[simd_test(enable = "avx512f,avx512vl")]
52032    unsafe fn test_mm_mask_storeu_pd() {
52033        let mut r = [42_f64; 2];
52034        let a = _mm_setr_pd(1.0, 2.0);
52035        let m = 0b10;
52036        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52037        let e = _mm_setr_pd(42.0, 2.0);
52038        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52039    }
52040
52041    #[simd_test(enable = "avx512f,avx512vl")]
52042    unsafe fn test_mm_mask_store_pd() {
52043        #[repr(align(16))]
52044        struct Align {
52045            data: [f64; 2], // 16 bytes
52046        }
52047        let mut r = Align { data: [42.0; 2] };
52048        let a = _mm_setr_pd(1.0, 2.0);
52049        let m = 0b10;
52050        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52051        let e = _mm_setr_pd(42.0, 2.0);
52052        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52053    }
52054
52055    #[simd_test(enable = "avx512f")]
52056    unsafe fn test_mm_mask_store_ss() {
52057        #[repr(align(16))]
52058        struct Align {
52059            data: f32,
52060        }
52061        let a = _mm_set_ss(2.0);
52062        let mut mem = Align { data: 1.0 };
52063        _mm_mask_store_ss(&mut mem.data, 0b1, a);
52064        assert_eq!(mem.data, 2.0);
52065        _mm_mask_store_ss(&mut mem.data, 0b0, a);
52066        assert_eq!(mem.data, 2.0);
52067    }
52068
52069    #[simd_test(enable = "avx512f")]
52070    unsafe fn test_mm_mask_store_sd() {
52071        #[repr(align(16))]
52072        struct Align {
52073            data: f64,
52074        }
52075        let a = _mm_set_sd(2.0);
52076        let mut mem = Align { data: 1.0 };
52077        _mm_mask_store_sd(&mut mem.data, 0b1, a);
52078        assert_eq!(mem.data, 2.0);
52079        _mm_mask_store_sd(&mut mem.data, 0b0, a);
52080        assert_eq!(mem.data, 2.0);
52081    }
52082
52083    #[simd_test(enable = "avx512f")]
52084    unsafe fn test_mm512_setr_pd() {
52085        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52086        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52087    }
52088
52089    #[simd_test(enable = "avx512f")]
52090    unsafe fn test_mm512_set_pd() {
52091        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52092        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52093    }
52094
52095    #[simd_test(enable = "avx512f")]
52096    unsafe fn test_mm512_rol_epi32() {
52097        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52098        let r = _mm512_rol_epi32::<1>(a);
52099        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52100        assert_eq_m512i(r, e);
52101    }
52102
52103    #[simd_test(enable = "avx512f")]
52104    unsafe fn test_mm512_mask_rol_epi32() {
52105        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52106        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
52107        assert_eq_m512i(r, a);
52108        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
52109        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52110        assert_eq_m512i(r, e);
52111    }
52112
52113    #[simd_test(enable = "avx512f")]
52114    unsafe fn test_mm512_maskz_rol_epi32() {
52115        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52116        let r = _mm512_maskz_rol_epi32::<1>(0, a);
52117        assert_eq_m512i(r, _mm512_setzero_si512());
52118        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
52119        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52120        assert_eq_m512i(r, e);
52121    }
52122
52123    #[simd_test(enable = "avx512f,avx512vl")]
52124    unsafe fn test_mm256_rol_epi32() {
52125        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52126        let r = _mm256_rol_epi32::<1>(a);
52127        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52128        assert_eq_m256i(r, e);
52129    }
52130
52131    #[simd_test(enable = "avx512f,avx512vl")]
52132    unsafe fn test_mm256_mask_rol_epi32() {
52133        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52134        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
52135        assert_eq_m256i(r, a);
52136        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
52137        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52138        assert_eq_m256i(r, e);
52139    }
52140
52141    #[simd_test(enable = "avx512f,avx512vl")]
52142    unsafe fn test_mm256_maskz_rol_epi32() {
52143        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52144        let r = _mm256_maskz_rol_epi32::<1>(0, a);
52145        assert_eq_m256i(r, _mm256_setzero_si256());
52146        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
52147        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52148        assert_eq_m256i(r, e);
52149    }
52150
52151    #[simd_test(enable = "avx512f,avx512vl")]
52152    unsafe fn test_mm_rol_epi32() {
52153        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52154        let r = _mm_rol_epi32::<1>(a);
52155        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52156        assert_eq_m128i(r, e);
52157    }
52158
52159    #[simd_test(enable = "avx512f,avx512vl")]
52160    unsafe fn test_mm_mask_rol_epi32() {
52161        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52162        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
52163        assert_eq_m128i(r, a);
52164        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
52165        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52166        assert_eq_m128i(r, e);
52167    }
52168
52169    #[simd_test(enable = "avx512f,avx512vl")]
52170    unsafe fn test_mm_maskz_rol_epi32() {
52171        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52172        let r = _mm_maskz_rol_epi32::<1>(0, a);
52173        assert_eq_m128i(r, _mm_setzero_si128());
52174        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
52175        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52176        assert_eq_m128i(r, e);
52177    }
52178
52179    #[simd_test(enable = "avx512f")]
52180    unsafe fn test_mm512_ror_epi32() {
52181        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52182        let r = _mm512_ror_epi32::<1>(a);
52183        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52184        assert_eq_m512i(r, e);
52185    }
52186
52187    #[simd_test(enable = "avx512f")]
52188    unsafe fn test_mm512_mask_ror_epi32() {
52189        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52190        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
52191        assert_eq_m512i(r, a);
52192        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
52193        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52194        assert_eq_m512i(r, e);
52195    }
52196
52197    #[simd_test(enable = "avx512f")]
52198    unsafe fn test_mm512_maskz_ror_epi32() {
52199        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52200        let r = _mm512_maskz_ror_epi32::<1>(0, a);
52201        assert_eq_m512i(r, _mm512_setzero_si512());
52202        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
52203        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52204        assert_eq_m512i(r, e);
52205    }
52206
52207    #[simd_test(enable = "avx512f,avx512vl")]
52208    unsafe fn test_mm256_ror_epi32() {
52209        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52210        let r = _mm256_ror_epi32::<1>(a);
52211        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52212        assert_eq_m256i(r, e);
52213    }
52214
52215    #[simd_test(enable = "avx512f,avx512vl")]
52216    unsafe fn test_mm256_mask_ror_epi32() {
52217        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52218        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
52219        assert_eq_m256i(r, a);
52220        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
52221        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52222        assert_eq_m256i(r, e);
52223    }
52224
52225    #[simd_test(enable = "avx512f,avx512vl")]
52226    unsafe fn test_mm256_maskz_ror_epi32() {
52227        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52228        let r = _mm256_maskz_ror_epi32::<1>(0, a);
52229        assert_eq_m256i(r, _mm256_setzero_si256());
52230        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
52231        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52232        assert_eq_m256i(r, e);
52233    }
52234
52235    #[simd_test(enable = "avx512f,avx512vl")]
52236    unsafe fn test_mm_ror_epi32() {
52237        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52238        let r = _mm_ror_epi32::<1>(a);
52239        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52240        assert_eq_m128i(r, e);
52241    }
52242
52243    #[simd_test(enable = "avx512f,avx512vl")]
52244    unsafe fn test_mm_mask_ror_epi32() {
52245        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52246        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
52247        assert_eq_m128i(r, a);
52248        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
52249        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52250        assert_eq_m128i(r, e);
52251    }
52252
52253    #[simd_test(enable = "avx512f,avx512vl")]
52254    unsafe fn test_mm_maskz_ror_epi32() {
52255        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52256        let r = _mm_maskz_ror_epi32::<1>(0, a);
52257        assert_eq_m128i(r, _mm_setzero_si128());
52258        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
52259        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52260        assert_eq_m128i(r, e);
52261    }
52262
52263    #[simd_test(enable = "avx512f")]
52264    unsafe fn test_mm512_slli_epi32() {
52265        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52266        let r = _mm512_slli_epi32::<1>(a);
52267        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52268        assert_eq_m512i(r, e);
52269    }
52270
52271    #[simd_test(enable = "avx512f")]
52272    unsafe fn test_mm512_mask_slli_epi32() {
52273        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52274        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
52275        assert_eq_m512i(r, a);
52276        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
52277        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52278        assert_eq_m512i(r, e);
52279    }
52280
52281    #[simd_test(enable = "avx512f")]
52282    unsafe fn test_mm512_maskz_slli_epi32() {
52283        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52284        let r = _mm512_maskz_slli_epi32::<1>(0, a);
52285        assert_eq_m512i(r, _mm512_setzero_si512());
52286        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
52287        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52288        assert_eq_m512i(r, e);
52289    }
52290
52291    #[simd_test(enable = "avx512f,avx512vl")]
52292    unsafe fn test_mm256_mask_slli_epi32() {
52293        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52294        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
52295        assert_eq_m256i(r, a);
52296        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
52297        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52298        assert_eq_m256i(r, e);
52299    }
52300
52301    #[simd_test(enable = "avx512f,avx512vl")]
52302    unsafe fn test_mm256_maskz_slli_epi32() {
52303        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52304        let r = _mm256_maskz_slli_epi32::<1>(0, a);
52305        assert_eq_m256i(r, _mm256_setzero_si256());
52306        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
52307        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52308        assert_eq_m256i(r, e);
52309    }
52310
52311    #[simd_test(enable = "avx512f,avx512vl")]
52312    unsafe fn test_mm_mask_slli_epi32() {
52313        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52314        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
52315        assert_eq_m128i(r, a);
52316        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
52317        let e = _mm_set_epi32(0, 2, 2, 2);
52318        assert_eq_m128i(r, e);
52319    }
52320
52321    #[simd_test(enable = "avx512f,avx512vl")]
52322    unsafe fn test_mm_maskz_slli_epi32() {
52323        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52324        let r = _mm_maskz_slli_epi32::<1>(0, a);
52325        assert_eq_m128i(r, _mm_setzero_si128());
52326        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
52327        let e = _mm_set_epi32(0, 2, 2, 2);
52328        assert_eq_m128i(r, e);
52329    }
52330
52331    #[simd_test(enable = "avx512f")]
52332    unsafe fn test_mm512_srli_epi32() {
52333        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52334        let r = _mm512_srli_epi32::<1>(a);
52335        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52336        assert_eq_m512i(r, e);
52337    }
52338
52339    #[simd_test(enable = "avx512f")]
52340    unsafe fn test_mm512_mask_srli_epi32() {
52341        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52342        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
52343        assert_eq_m512i(r, a);
52344        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
52345        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52346        assert_eq_m512i(r, e);
52347    }
52348
52349    #[simd_test(enable = "avx512f")]
52350    unsafe fn test_mm512_maskz_srli_epi32() {
52351        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52352        let r = _mm512_maskz_srli_epi32::<1>(0, a);
52353        assert_eq_m512i(r, _mm512_setzero_si512());
52354        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
52355        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
52356        assert_eq_m512i(r, e);
52357    }
52358
52359    #[simd_test(enable = "avx512f,avx512vl")]
52360    unsafe fn test_mm256_mask_srli_epi32() {
52361        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52362        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
52363        assert_eq_m256i(r, a);
52364        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
52365        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52366        assert_eq_m256i(r, e);
52367    }
52368
52369    #[simd_test(enable = "avx512f,avx512vl")]
52370    unsafe fn test_mm256_maskz_srli_epi32() {
52371        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52372        let r = _mm256_maskz_srli_epi32::<1>(0, a);
52373        assert_eq_m256i(r, _mm256_setzero_si256());
52374        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
52375        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52376        assert_eq_m256i(r, e);
52377    }
52378
52379    #[simd_test(enable = "avx512f,avx512vl")]
52380    unsafe fn test_mm_mask_srli_epi32() {
52381        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52382        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
52383        assert_eq_m128i(r, a);
52384        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
52385        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52386        assert_eq_m128i(r, e);
52387    }
52388
52389    #[simd_test(enable = "avx512f,avx512vl")]
52390    unsafe fn test_mm_maskz_srli_epi32() {
52391        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52392        let r = _mm_maskz_srli_epi32::<1>(0, a);
52393        assert_eq_m128i(r, _mm_setzero_si128());
52394        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
52395        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52396        assert_eq_m128i(r, e);
52397    }
52398
52399    #[simd_test(enable = "avx512f")]
52400    unsafe fn test_mm512_rolv_epi32() {
52401        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52402        let b = _mm512_set1_epi32(1);
52403        let r = _mm512_rolv_epi32(a, b);
52404        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52405        assert_eq_m512i(r, e);
52406    }
52407
52408    #[simd_test(enable = "avx512f")]
52409    unsafe fn test_mm512_mask_rolv_epi32() {
52410        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52411        let b = _mm512_set1_epi32(1);
52412        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
52413        assert_eq_m512i(r, a);
52414        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
52415        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52416        assert_eq_m512i(r, e);
52417    }
52418
52419    #[simd_test(enable = "avx512f")]
52420    unsafe fn test_mm512_maskz_rolv_epi32() {
52421        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52422        let b = _mm512_set1_epi32(1);
52423        let r = _mm512_maskz_rolv_epi32(0, a, b);
52424        assert_eq_m512i(r, _mm512_setzero_si512());
52425        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
52426        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52427        assert_eq_m512i(r, e);
52428    }
52429
52430    #[simd_test(enable = "avx512f,avx512vl")]
52431    unsafe fn test_mm256_rolv_epi32() {
52432        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52433        let b = _mm256_set1_epi32(1);
52434        let r = _mm256_rolv_epi32(a, b);
52435        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52436        assert_eq_m256i(r, e);
52437    }
52438
52439    #[simd_test(enable = "avx512f,avx512vl")]
52440    unsafe fn test_mm256_mask_rolv_epi32() {
52441        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52442        let b = _mm256_set1_epi32(1);
52443        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
52444        assert_eq_m256i(r, a);
52445        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
52446        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52447        assert_eq_m256i(r, e);
52448    }
52449
52450    #[simd_test(enable = "avx512f,avx512vl")]
52451    unsafe fn test_mm256_maskz_rolv_epi32() {
52452        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52453        let b = _mm256_set1_epi32(1);
52454        let r = _mm256_maskz_rolv_epi32(0, a, b);
52455        assert_eq_m256i(r, _mm256_setzero_si256());
52456        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
52457        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52458        assert_eq_m256i(r, e);
52459    }
52460
52461    #[simd_test(enable = "avx512f,avx512vl")]
52462    unsafe fn test_mm_rolv_epi32() {
52463        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52464        let b = _mm_set1_epi32(1);
52465        let r = _mm_rolv_epi32(a, b);
52466        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52467        assert_eq_m128i(r, e);
52468    }
52469
52470    #[simd_test(enable = "avx512f,avx512vl")]
52471    unsafe fn test_mm_mask_rolv_epi32() {
52472        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52473        let b = _mm_set1_epi32(1);
52474        let r = _mm_mask_rolv_epi32(a, 0, a, b);
52475        assert_eq_m128i(r, a);
52476        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
52477        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52478        assert_eq_m128i(r, e);
52479    }
52480
52481    #[simd_test(enable = "avx512f,avx512vl")]
52482    unsafe fn test_mm_maskz_rolv_epi32() {
52483        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52484        let b = _mm_set1_epi32(1);
52485        let r = _mm_maskz_rolv_epi32(0, a, b);
52486        assert_eq_m128i(r, _mm_setzero_si128());
52487        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
52488        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52489        assert_eq_m128i(r, e);
52490    }
52491
52492    #[simd_test(enable = "avx512f")]
52493    unsafe fn test_mm512_rorv_epi32() {
52494        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52495        let b = _mm512_set1_epi32(1);
52496        let r = _mm512_rorv_epi32(a, b);
52497        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52498        assert_eq_m512i(r, e);
52499    }
52500
52501    #[simd_test(enable = "avx512f")]
52502    unsafe fn test_mm512_mask_rorv_epi32() {
52503        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52504        let b = _mm512_set1_epi32(1);
52505        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
52506        assert_eq_m512i(r, a);
52507        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
52508        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52509        assert_eq_m512i(r, e);
52510    }
52511
52512    #[simd_test(enable = "avx512f")]
52513    unsafe fn test_mm512_maskz_rorv_epi32() {
52514        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52515        let b = _mm512_set1_epi32(1);
52516        let r = _mm512_maskz_rorv_epi32(0, a, b);
52517        assert_eq_m512i(r, _mm512_setzero_si512());
52518        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
52519        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52520        assert_eq_m512i(r, e);
52521    }
52522
52523    #[simd_test(enable = "avx512f,avx512vl")]
52524    unsafe fn test_mm256_rorv_epi32() {
52525        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52526        let b = _mm256_set1_epi32(1);
52527        let r = _mm256_rorv_epi32(a, b);
52528        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52529        assert_eq_m256i(r, e);
52530    }
52531
52532    #[simd_test(enable = "avx512f,avx512vl")]
52533    unsafe fn test_mm256_mask_rorv_epi32() {
52534        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52535        let b = _mm256_set1_epi32(1);
52536        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
52537        assert_eq_m256i(r, a);
52538        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
52539        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52540        assert_eq_m256i(r, e);
52541    }
52542
52543    #[simd_test(enable = "avx512f,avx512vl")]
52544    unsafe fn test_mm256_maskz_rorv_epi32() {
52545        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52546        let b = _mm256_set1_epi32(1);
52547        let r = _mm256_maskz_rorv_epi32(0, a, b);
52548        assert_eq_m256i(r, _mm256_setzero_si256());
52549        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
52550        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52551        assert_eq_m256i(r, e);
52552    }
52553
52554    #[simd_test(enable = "avx512f,avx512vl")]
52555    unsafe fn test_mm_rorv_epi32() {
52556        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52557        let b = _mm_set1_epi32(1);
52558        let r = _mm_rorv_epi32(a, b);
52559        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52560        assert_eq_m128i(r, e);
52561    }
52562
52563    #[simd_test(enable = "avx512f,avx512vl")]
52564    unsafe fn test_mm_mask_rorv_epi32() {
52565        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52566        let b = _mm_set1_epi32(1);
52567        let r = _mm_mask_rorv_epi32(a, 0, a, b);
52568        assert_eq_m128i(r, a);
52569        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
52570        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52571        assert_eq_m128i(r, e);
52572    }
52573
52574    #[simd_test(enable = "avx512f,avx512vl")]
52575    unsafe fn test_mm_maskz_rorv_epi32() {
52576        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52577        let b = _mm_set1_epi32(1);
52578        let r = _mm_maskz_rorv_epi32(0, a, b);
52579        assert_eq_m128i(r, _mm_setzero_si128());
52580        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
52581        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52582        assert_eq_m128i(r, e);
52583    }
52584
52585    #[simd_test(enable = "avx512f")]
52586    unsafe fn test_mm512_sllv_epi32() {
52587        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52588        let count = _mm512_set1_epi32(1);
52589        let r = _mm512_sllv_epi32(a, count);
52590        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52591        assert_eq_m512i(r, e);
52592    }
52593
52594    #[simd_test(enable = "avx512f")]
52595    unsafe fn test_mm512_mask_sllv_epi32() {
52596        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52597        let count = _mm512_set1_epi32(1);
52598        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
52599        assert_eq_m512i(r, a);
52600        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
52601        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52602        assert_eq_m512i(r, e);
52603    }
52604
52605    #[simd_test(enable = "avx512f")]
52606    unsafe fn test_mm512_maskz_sllv_epi32() {
52607        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52608        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52609        let r = _mm512_maskz_sllv_epi32(0, a, count);
52610        assert_eq_m512i(r, _mm512_setzero_si512());
52611        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
52612        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52613        assert_eq_m512i(r, e);
52614    }
52615
52616    #[simd_test(enable = "avx512f,avx512vl")]
52617    unsafe fn test_mm256_mask_sllv_epi32() {
52618        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52619        let count = _mm256_set1_epi32(1);
52620        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
52621        assert_eq_m256i(r, a);
52622        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
52623        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52624        assert_eq_m256i(r, e);
52625    }
52626
52627    #[simd_test(enable = "avx512f,avx512vl")]
52628    unsafe fn test_mm256_maskz_sllv_epi32() {
52629        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52630        let count = _mm256_set1_epi32(1);
52631        let r = _mm256_maskz_sllv_epi32(0, a, count);
52632        assert_eq_m256i(r, _mm256_setzero_si256());
52633        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
52634        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52635        assert_eq_m256i(r, e);
52636    }
52637
52638    #[simd_test(enable = "avx512f,avx512vl")]
52639    unsafe fn test_mm_mask_sllv_epi32() {
52640        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52641        let count = _mm_set1_epi32(1);
52642        let r = _mm_mask_sllv_epi32(a, 0, a, count);
52643        assert_eq_m128i(r, a);
52644        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
52645        let e = _mm_set_epi32(0, 2, 2, 2);
52646        assert_eq_m128i(r, e);
52647    }
52648
52649    #[simd_test(enable = "avx512f,avx512vl")]
52650    unsafe fn test_mm_maskz_sllv_epi32() {
52651        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52652        let count = _mm_set1_epi32(1);
52653        let r = _mm_maskz_sllv_epi32(0, a, count);
52654        assert_eq_m128i(r, _mm_setzero_si128());
52655        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
52656        let e = _mm_set_epi32(0, 2, 2, 2);
52657        assert_eq_m128i(r, e);
52658    }
52659
52660    #[simd_test(enable = "avx512f")]
52661    unsafe fn test_mm512_srlv_epi32() {
52662        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52663        let count = _mm512_set1_epi32(1);
52664        let r = _mm512_srlv_epi32(a, count);
52665        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52666        assert_eq_m512i(r, e);
52667    }
52668
52669    #[simd_test(enable = "avx512f")]
52670    unsafe fn test_mm512_mask_srlv_epi32() {
52671        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52672        let count = _mm512_set1_epi32(1);
52673        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
52674        assert_eq_m512i(r, a);
52675        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
52676        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52677        assert_eq_m512i(r, e);
52678    }
52679
52680    #[simd_test(enable = "avx512f")]
52681    unsafe fn test_mm512_maskz_srlv_epi32() {
52682        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52683        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52684        let r = _mm512_maskz_srlv_epi32(0, a, count);
52685        assert_eq_m512i(r, _mm512_setzero_si512());
52686        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
52687        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
52688        assert_eq_m512i(r, e);
52689    }
52690
52691    #[simd_test(enable = "avx512f,avx512vl")]
52692    unsafe fn test_mm256_mask_srlv_epi32() {
52693        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52694        let count = _mm256_set1_epi32(1);
52695        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
52696        assert_eq_m256i(r, a);
52697        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
52698        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52699        assert_eq_m256i(r, e);
52700    }
52701
52702    #[simd_test(enable = "avx512f,avx512vl")]
52703    unsafe fn test_mm256_maskz_srlv_epi32() {
52704        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52705        let count = _mm256_set1_epi32(1);
52706        let r = _mm256_maskz_srlv_epi32(0, a, count);
52707        assert_eq_m256i(r, _mm256_setzero_si256());
52708        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
52709        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52710        assert_eq_m256i(r, e);
52711    }
52712
52713    #[simd_test(enable = "avx512f,avx512vl")]
52714    unsafe fn test_mm_mask_srlv_epi32() {
52715        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52716        let count = _mm_set1_epi32(1);
52717        let r = _mm_mask_srlv_epi32(a, 0, a, count);
52718        assert_eq_m128i(r, a);
52719        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
52720        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52721        assert_eq_m128i(r, e);
52722    }
52723
52724    #[simd_test(enable = "avx512f,avx512vl")]
52725    unsafe fn test_mm_maskz_srlv_epi32() {
52726        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52727        let count = _mm_set1_epi32(1);
52728        let r = _mm_maskz_srlv_epi32(0, a, count);
52729        assert_eq_m128i(r, _mm_setzero_si128());
52730        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
52731        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52732        assert_eq_m128i(r, e);
52733    }
52734
52735    #[simd_test(enable = "avx512f")]
52736    unsafe fn test_mm512_sll_epi32() {
52737        #[rustfmt::skip]
52738        let a = _mm512_set_epi32(
52739            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52740            0, 0, 0, 0,
52741            0, 0, 0, 0,
52742            0, 0, 0, 0,
52743        );
52744        let count = _mm_set_epi32(0, 0, 0, 2);
52745        let r = _mm512_sll_epi32(a, count);
52746        #[rustfmt::skip]
52747        let e = _mm512_set_epi32(
52748            0, 1 << 2, 1 << 3, 1 << 4,
52749            0, 0, 0, 0,
52750            0, 0, 0, 0,
52751            0, 0, 0, 0,
52752        );
52753        assert_eq_m512i(r, e);
52754    }
52755
52756    #[simd_test(enable = "avx512f")]
52757    unsafe fn test_mm512_mask_sll_epi32() {
52758        #[rustfmt::skip]
52759        let a = _mm512_set_epi32(
52760            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52761            0, 0, 0, 0,
52762            0, 0, 0, 0,
52763            0, 0, 0, 0,
52764        );
52765        let count = _mm_set_epi32(0, 0, 0, 2);
52766        let r = _mm512_mask_sll_epi32(a, 0, a, count);
52767        assert_eq_m512i(r, a);
52768        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
52769        #[rustfmt::skip]
52770        let e = _mm512_set_epi32(
52771            0, 1 << 2, 1 << 3, 1 << 4,
52772            0, 0, 0, 0,
52773            0, 0, 0, 0,
52774            0, 0, 0, 0,
52775        );
52776        assert_eq_m512i(r, e);
52777    }
52778
52779    #[simd_test(enable = "avx512f")]
52780    unsafe fn test_mm512_maskz_sll_epi32() {
52781        #[rustfmt::skip]
52782        let a = _mm512_set_epi32(
52783            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52784            0, 0, 0, 0,
52785            0, 0, 0, 0,
52786            0, 0, 0, 1 << 31,
52787        );
52788        let count = _mm_set_epi32(2, 0, 0, 2);
52789        let r = _mm512_maskz_sll_epi32(0, a, count);
52790        assert_eq_m512i(r, _mm512_setzero_si512());
52791        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
52792        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52793        assert_eq_m512i(r, e);
52794    }
52795
52796    #[simd_test(enable = "avx512f,avx512vl")]
52797    unsafe fn test_mm256_mask_sll_epi32() {
52798        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52799        let count = _mm_set_epi32(0, 0, 0, 1);
52800        let r = _mm256_mask_sll_epi32(a, 0, a, count);
52801        assert_eq_m256i(r, a);
52802        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
52803        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52804        assert_eq_m256i(r, e);
52805    }
52806
52807    #[simd_test(enable = "avx512f,avx512vl")]
52808    unsafe fn test_mm256_maskz_sll_epi32() {
52809        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52810        let count = _mm_set_epi32(0, 0, 0, 1);
52811        let r = _mm256_maskz_sll_epi32(0, a, count);
52812        assert_eq_m256i(r, _mm256_setzero_si256());
52813        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
52814        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52815        assert_eq_m256i(r, e);
52816    }
52817
52818    #[simd_test(enable = "avx512f,avx512vl")]
52819    unsafe fn test_mm_mask_sll_epi32() {
52820        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52821        let count = _mm_set_epi32(0, 0, 0, 1);
52822        let r = _mm_mask_sll_epi32(a, 0, a, count);
52823        assert_eq_m128i(r, a);
52824        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
52825        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52826        assert_eq_m128i(r, e);
52827    }
52828
52829    #[simd_test(enable = "avx512f,avx512vl")]
52830    unsafe fn test_mm_maskz_sll_epi32() {
52831        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52832        let count = _mm_set_epi32(0, 0, 0, 1);
52833        let r = _mm_maskz_sll_epi32(0, a, count);
52834        assert_eq_m128i(r, _mm_setzero_si128());
52835        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
52836        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52837        assert_eq_m128i(r, e);
52838    }
52839
52840    #[simd_test(enable = "avx512f")]
52841    unsafe fn test_mm512_srl_epi32() {
52842        #[rustfmt::skip]
52843        let a = _mm512_set_epi32(
52844            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52845            0, 0, 0, 0,
52846            0, 0, 0, 0,
52847            0, 0, 0, 0,
52848        );
52849        let count = _mm_set_epi32(0, 0, 0, 2);
52850        let r = _mm512_srl_epi32(a, count);
52851        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52852        assert_eq_m512i(r, e);
52853    }
52854
52855    #[simd_test(enable = "avx512f")]
52856    unsafe fn test_mm512_mask_srl_epi32() {
52857        #[rustfmt::skip]
52858        let a = _mm512_set_epi32(
52859            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52860            0, 0, 0, 0,
52861            0, 0, 0, 0,
52862            0, 0, 0, 0,
52863        );
52864        let count = _mm_set_epi32(0, 0, 0, 2);
52865        let r = _mm512_mask_srl_epi32(a, 0, a, count);
52866        assert_eq_m512i(r, a);
52867        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
52868        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52869        assert_eq_m512i(r, e);
52870    }
52871
52872    #[simd_test(enable = "avx512f")]
52873    unsafe fn test_mm512_maskz_srl_epi32() {
52874        #[rustfmt::skip]
52875        let a = _mm512_set_epi32(
52876            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52877            0, 0, 0, 0,
52878            0, 0, 0, 0,
52879            0, 0, 0, 1 << 31,
52880        );
52881        let count = _mm_set_epi32(2, 0, 0, 2);
52882        let r = _mm512_maskz_srl_epi32(0, a, count);
52883        assert_eq_m512i(r, _mm512_setzero_si512());
52884        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
52885        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
52886        assert_eq_m512i(r, e);
52887    }
52888
52889    #[simd_test(enable = "avx512f,avx512vl")]
52890    unsafe fn test_mm256_mask_srl_epi32() {
52891        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52892        let count = _mm_set_epi32(0, 0, 0, 1);
52893        let r = _mm256_mask_srl_epi32(a, 0, a, count);
52894        assert_eq_m256i(r, a);
52895        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
52896        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52897        assert_eq_m256i(r, e);
52898    }
52899
52900    #[simd_test(enable = "avx512f,avx512vl")]
52901    unsafe fn test_mm256_maskz_srl_epi32() {
52902        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52903        let count = _mm_set_epi32(0, 0, 0, 1);
52904        let r = _mm256_maskz_srl_epi32(0, a, count);
52905        assert_eq_m256i(r, _mm256_setzero_si256());
52906        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
52907        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52908        assert_eq_m256i(r, e);
52909    }
52910
52911    #[simd_test(enable = "avx512f,avx512vl")]
52912    unsafe fn test_mm_mask_srl_epi32() {
52913        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52914        let count = _mm_set_epi32(0, 0, 0, 1);
52915        let r = _mm_mask_srl_epi32(a, 0, a, count);
52916        assert_eq_m128i(r, a);
52917        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
52918        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52919        assert_eq_m128i(r, e);
52920    }
52921
52922    #[simd_test(enable = "avx512f,avx512vl")]
52923    unsafe fn test_mm_maskz_srl_epi32() {
52924        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52925        let count = _mm_set_epi32(0, 0, 0, 1);
52926        let r = _mm_maskz_srl_epi32(0, a, count);
52927        assert_eq_m128i(r, _mm_setzero_si128());
52928        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
52929        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52930        assert_eq_m128i(r, e);
52931    }
52932
52933    #[simd_test(enable = "avx512f")]
52934    unsafe fn test_mm512_sra_epi32() {
52935        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
52936        let count = _mm_set_epi32(1, 0, 0, 2);
52937        let r = _mm512_sra_epi32(a, count);
52938        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52939        assert_eq_m512i(r, e);
52940    }
52941
52942    #[simd_test(enable = "avx512f")]
52943    unsafe fn test_mm512_mask_sra_epi32() {
52944        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
52945        let count = _mm_set_epi32(0, 0, 0, 2);
52946        let r = _mm512_mask_sra_epi32(a, 0, a, count);
52947        assert_eq_m512i(r, a);
52948        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
52949        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
52950        assert_eq_m512i(r, e);
52951    }
52952
52953    #[simd_test(enable = "avx512f")]
52954    unsafe fn test_mm512_maskz_sra_epi32() {
52955        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
52956        let count = _mm_set_epi32(2, 0, 0, 2);
52957        let r = _mm512_maskz_sra_epi32(0, a, count);
52958        assert_eq_m512i(r, _mm512_setzero_si512());
52959        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
52960        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
52961        assert_eq_m512i(r, e);
52962    }
52963
52964    #[simd_test(enable = "avx512f,avx512vl")]
52965    unsafe fn test_mm256_mask_sra_epi32() {
52966        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52967        let count = _mm_set_epi32(0, 0, 0, 1);
52968        let r = _mm256_mask_sra_epi32(a, 0, a, count);
52969        assert_eq_m256i(r, a);
52970        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
52971        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52972        assert_eq_m256i(r, e);
52973    }
52974
52975    #[simd_test(enable = "avx512f,avx512vl")]
52976    unsafe fn test_mm256_maskz_sra_epi32() {
52977        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52978        let count = _mm_set_epi32(0, 0, 0, 1);
52979        let r = _mm256_maskz_sra_epi32(0, a, count);
52980        assert_eq_m256i(r, _mm256_setzero_si256());
52981        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
52982        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52983        assert_eq_m256i(r, e);
52984    }
52985
52986    #[simd_test(enable = "avx512f,avx512vl")]
52987    unsafe fn test_mm_mask_sra_epi32() {
52988        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52989        let count = _mm_set_epi32(0, 0, 0, 1);
52990        let r = _mm_mask_sra_epi32(a, 0, a, count);
52991        assert_eq_m128i(r, a);
52992        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
52993        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52994        assert_eq_m128i(r, e);
52995    }
52996
52997    #[simd_test(enable = "avx512f,avx512vl")]
52998    unsafe fn test_mm_maskz_sra_epi32() {
52999        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53000        let count = _mm_set_epi32(0, 0, 0, 1);
53001        let r = _mm_maskz_sra_epi32(0, a, count);
53002        assert_eq_m128i(r, _mm_setzero_si128());
53003        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
53004        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53005        assert_eq_m128i(r, e);
53006    }
53007
53008    #[simd_test(enable = "avx512f")]
53009    unsafe fn test_mm512_srav_epi32() {
53010        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53011        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53012        let r = _mm512_srav_epi32(a, count);
53013        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53014        assert_eq_m512i(r, e);
53015    }
53016
53017    #[simd_test(enable = "avx512f")]
53018    unsafe fn test_mm512_mask_srav_epi32() {
53019        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53020        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53021        let r = _mm512_mask_srav_epi32(a, 0, a, count);
53022        assert_eq_m512i(r, a);
53023        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
53024        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
53025        assert_eq_m512i(r, e);
53026    }
53027
53028    #[simd_test(enable = "avx512f")]
53029    unsafe fn test_mm512_maskz_srav_epi32() {
53030        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53031        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
53032        let r = _mm512_maskz_srav_epi32(0, a, count);
53033        assert_eq_m512i(r, _mm512_setzero_si512());
53034        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
53035        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53036        assert_eq_m512i(r, e);
53037    }
53038
53039    #[simd_test(enable = "avx512f,avx512vl")]
53040    unsafe fn test_mm256_mask_srav_epi32() {
53041        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53042        let count = _mm256_set1_epi32(1);
53043        let r = _mm256_mask_srav_epi32(a, 0, a, count);
53044        assert_eq_m256i(r, a);
53045        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
53046        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53047        assert_eq_m256i(r, e);
53048    }
53049
53050    #[simd_test(enable = "avx512f,avx512vl")]
53051    unsafe fn test_mm256_maskz_srav_epi32() {
53052        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53053        let count = _mm256_set1_epi32(1);
53054        let r = _mm256_maskz_srav_epi32(0, a, count);
53055        assert_eq_m256i(r, _mm256_setzero_si256());
53056        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
53057        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53058        assert_eq_m256i(r, e);
53059    }
53060
53061    #[simd_test(enable = "avx512f,avx512vl")]
53062    unsafe fn test_mm_mask_srav_epi32() {
53063        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53064        let count = _mm_set1_epi32(1);
53065        let r = _mm_mask_srav_epi32(a, 0, a, count);
53066        assert_eq_m128i(r, a);
53067        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
53068        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53069        assert_eq_m128i(r, e);
53070    }
53071
53072    #[simd_test(enable = "avx512f,avx512vl")]
53073    unsafe fn test_mm_maskz_srav_epi32() {
53074        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53075        let count = _mm_set1_epi32(1);
53076        let r = _mm_maskz_srav_epi32(0, a, count);
53077        assert_eq_m128i(r, _mm_setzero_si128());
53078        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
53079        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53080        assert_eq_m128i(r, e);
53081    }
53082
53083    #[simd_test(enable = "avx512f")]
53084    unsafe fn test_mm512_srai_epi32() {
53085        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
53086        let r = _mm512_srai_epi32::<2>(a);
53087        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
53088        assert_eq_m512i(r, e);
53089    }
53090
53091    #[simd_test(enable = "avx512f")]
53092    unsafe fn test_mm512_mask_srai_epi32() {
53093        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53094        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
53095        assert_eq_m512i(r, a);
53096        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
53097        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53098        assert_eq_m512i(r, e);
53099    }
53100
53101    #[simd_test(enable = "avx512f")]
53102    unsafe fn test_mm512_maskz_srai_epi32() {
53103        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53104        let r = _mm512_maskz_srai_epi32::<2>(0, a);
53105        assert_eq_m512i(r, _mm512_setzero_si512());
53106        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
53107        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53108        assert_eq_m512i(r, e);
53109    }
53110
53111    #[simd_test(enable = "avx512f,avx512vl")]
53112    unsafe fn test_mm256_mask_srai_epi32() {
53113        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53114        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
53115        assert_eq_m256i(r, a);
53116        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
53117        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53118        assert_eq_m256i(r, e);
53119    }
53120
53121    #[simd_test(enable = "avx512f,avx512vl")]
53122    unsafe fn test_mm256_maskz_srai_epi32() {
53123        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53124        let r = _mm256_maskz_srai_epi32::<1>(0, a);
53125        assert_eq_m256i(r, _mm256_setzero_si256());
53126        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
53127        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53128        assert_eq_m256i(r, e);
53129    }
53130
53131    #[simd_test(enable = "avx512f,avx512vl")]
53132    unsafe fn test_mm_mask_srai_epi32() {
53133        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53134        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
53135        assert_eq_m128i(r, a);
53136        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
53137        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53138        assert_eq_m128i(r, e);
53139    }
53140
53141    #[simd_test(enable = "avx512f,avx512vl")]
53142    unsafe fn test_mm_maskz_srai_epi32() {
53143        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53144        let r = _mm_maskz_srai_epi32::<1>(0, a);
53145        assert_eq_m128i(r, _mm_setzero_si128());
53146        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
53147        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53148        assert_eq_m128i(r, e);
53149    }
53150
53151    #[simd_test(enable = "avx512f")]
53152    unsafe fn test_mm512_permute_ps() {
53153        let a = _mm512_setr_ps(
53154            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53155        );
53156        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
53157        let e = _mm512_setr_ps(
53158            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53159        );
53160        assert_eq_m512(r, e);
53161    }
53162
53163    #[simd_test(enable = "avx512f")]
53164    unsafe fn test_mm512_mask_permute_ps() {
53165        let a = _mm512_setr_ps(
53166            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53167        );
53168        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53169        assert_eq_m512(r, a);
53170        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
53171        let e = _mm512_setr_ps(
53172            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53173        );
53174        assert_eq_m512(r, e);
53175    }
53176
53177    #[simd_test(enable = "avx512f")]
53178    unsafe fn test_mm512_maskz_permute_ps() {
53179        let a = _mm512_setr_ps(
53180            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53181        );
53182        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
53183        assert_eq_m512(r, _mm512_setzero_ps());
53184        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
53185        let e = _mm512_setr_ps(
53186            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53187        );
53188        assert_eq_m512(r, e);
53189    }
53190
53191    #[simd_test(enable = "avx512f,avx512vl")]
53192    unsafe fn test_mm256_mask_permute_ps() {
53193        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53194        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53195        assert_eq_m256(r, a);
53196        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
53197        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53198        assert_eq_m256(r, e);
53199    }
53200
53201    #[simd_test(enable = "avx512f,avx512vl")]
53202    unsafe fn test_mm256_maskz_permute_ps() {
53203        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53204        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
53205        assert_eq_m256(r, _mm256_setzero_ps());
53206        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
53207        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53208        assert_eq_m256(r, e);
53209    }
53210
53211    #[simd_test(enable = "avx512f,avx512vl")]
53212    unsafe fn test_mm_mask_permute_ps() {
53213        let a = _mm_set_ps(0., 1., 2., 3.);
53214        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53215        assert_eq_m128(r, a);
53216        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
53217        let e = _mm_set_ps(0., 0., 0., 0.);
53218        assert_eq_m128(r, e);
53219    }
53220
53221    #[simd_test(enable = "avx512f,avx512vl")]
53222    unsafe fn test_mm_maskz_permute_ps() {
53223        let a = _mm_set_ps(0., 1., 2., 3.);
53224        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
53225        assert_eq_m128(r, _mm_setzero_ps());
53226        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
53227        let e = _mm_set_ps(0., 0., 0., 0.);
53228        assert_eq_m128(r, e);
53229    }
53230
53231    #[simd_test(enable = "avx512f")]
53232    unsafe fn test_mm512_permutevar_epi32() {
53233        let idx = _mm512_set1_epi32(1);
53234        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53235        let r = _mm512_permutevar_epi32(idx, a);
53236        let e = _mm512_set1_epi32(14);
53237        assert_eq_m512i(r, e);
53238    }
53239
53240    #[simd_test(enable = "avx512f")]
53241    unsafe fn test_mm512_mask_permutevar_epi32() {
53242        let idx = _mm512_set1_epi32(1);
53243        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53244        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
53245        assert_eq_m512i(r, a);
53246        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
53247        let e = _mm512_set1_epi32(14);
53248        assert_eq_m512i(r, e);
53249    }
53250
53251    #[simd_test(enable = "avx512f")]
53252    unsafe fn test_mm512_permutevar_ps() {
53253        let a = _mm512_set_ps(
53254            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53255        );
53256        let b = _mm512_set1_epi32(0b01);
53257        let r = _mm512_permutevar_ps(a, b);
53258        let e = _mm512_set_ps(
53259            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53260        );
53261        assert_eq_m512(r, e);
53262    }
53263
53264    #[simd_test(enable = "avx512f")]
53265    unsafe fn test_mm512_mask_permutevar_ps() {
53266        let a = _mm512_set_ps(
53267            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53268        );
53269        let b = _mm512_set1_epi32(0b01);
53270        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
53271        assert_eq_m512(r, a);
53272        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
53273        let e = _mm512_set_ps(
53274            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53275        );
53276        assert_eq_m512(r, e);
53277    }
53278
53279    #[simd_test(enable = "avx512f")]
53280    unsafe fn test_mm512_maskz_permutevar_ps() {
53281        let a = _mm512_set_ps(
53282            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53283        );
53284        let b = _mm512_set1_epi32(0b01);
53285        let r = _mm512_maskz_permutevar_ps(0, a, b);
53286        assert_eq_m512(r, _mm512_setzero_ps());
53287        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
53288        let e = _mm512_set_ps(
53289            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
53290        );
53291        assert_eq_m512(r, e);
53292    }
53293
53294    #[simd_test(enable = "avx512f,avx512vl")]
53295    unsafe fn test_mm256_mask_permutevar_ps() {
53296        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53297        let b = _mm256_set1_epi32(0b01);
53298        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
53299        assert_eq_m256(r, a);
53300        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
53301        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53302        assert_eq_m256(r, e);
53303    }
53304
53305    #[simd_test(enable = "avx512f,avx512vl")]
53306    unsafe fn test_mm256_maskz_permutevar_ps() {
53307        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53308        let b = _mm256_set1_epi32(0b01);
53309        let r = _mm256_maskz_permutevar_ps(0, a, b);
53310        assert_eq_m256(r, _mm256_setzero_ps());
53311        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
53312        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53313        assert_eq_m256(r, e);
53314    }
53315
53316    #[simd_test(enable = "avx512f,avx512vl")]
53317    unsafe fn test_mm_mask_permutevar_ps() {
53318        let a = _mm_set_ps(0., 1., 2., 3.);
53319        let b = _mm_set1_epi32(0b01);
53320        let r = _mm_mask_permutevar_ps(a, 0, a, b);
53321        assert_eq_m128(r, a);
53322        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
53323        let e = _mm_set_ps(2., 2., 2., 2.);
53324        assert_eq_m128(r, e);
53325    }
53326
53327    #[simd_test(enable = "avx512f,avx512vl")]
53328    unsafe fn test_mm_maskz_permutevar_ps() {
53329        let a = _mm_set_ps(0., 1., 2., 3.);
53330        let b = _mm_set1_epi32(0b01);
53331        let r = _mm_maskz_permutevar_ps(0, a, b);
53332        assert_eq_m128(r, _mm_setzero_ps());
53333        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
53334        let e = _mm_set_ps(2., 2., 2., 2.);
53335        assert_eq_m128(r, e);
53336    }
53337
53338    #[simd_test(enable = "avx512f")]
53339    unsafe fn test_mm512_permutexvar_epi32() {
53340        let idx = _mm512_set1_epi32(1);
53341        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53342        let r = _mm512_permutexvar_epi32(idx, a);
53343        let e = _mm512_set1_epi32(14);
53344        assert_eq_m512i(r, e);
53345    }
53346
53347    #[simd_test(enable = "avx512f")]
53348    unsafe fn test_mm512_mask_permutexvar_epi32() {
53349        let idx = _mm512_set1_epi32(1);
53350        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53351        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
53352        assert_eq_m512i(r, a);
53353        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
53354        let e = _mm512_set1_epi32(14);
53355        assert_eq_m512i(r, e);
53356    }
53357
53358    #[simd_test(enable = "avx512f")]
53359    unsafe fn test_mm512_maskz_permutexvar_epi32() {
53360        let idx = _mm512_set1_epi32(1);
53361        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53362        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
53363        assert_eq_m512i(r, _mm512_setzero_si512());
53364        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
53365        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
53366        assert_eq_m512i(r, e);
53367    }
53368
53369    #[simd_test(enable = "avx512f,avx512vl")]
53370    unsafe fn test_mm256_permutexvar_epi32() {
53371        let idx = _mm256_set1_epi32(1);
53372        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53373        let r = _mm256_permutexvar_epi32(idx, a);
53374        let e = _mm256_set1_epi32(6);
53375        assert_eq_m256i(r, e);
53376    }
53377
53378    #[simd_test(enable = "avx512f,avx512vl")]
53379    unsafe fn test_mm256_mask_permutexvar_epi32() {
53380        let idx = _mm256_set1_epi32(1);
53381        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53382        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
53383        assert_eq_m256i(r, a);
53384        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
53385        let e = _mm256_set1_epi32(6);
53386        assert_eq_m256i(r, e);
53387    }
53388
53389    #[simd_test(enable = "avx512f,avx512vl")]
53390    unsafe fn test_mm256_maskz_permutexvar_epi32() {
53391        let idx = _mm256_set1_epi32(1);
53392        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53393        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
53394        assert_eq_m256i(r, _mm256_setzero_si256());
53395        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
53396        let e = _mm256_set1_epi32(6);
53397        assert_eq_m256i(r, e);
53398    }
53399
53400    #[simd_test(enable = "avx512f")]
53401    unsafe fn test_mm512_permutexvar_ps() {
53402        let idx = _mm512_set1_epi32(1);
53403        let a = _mm512_set_ps(
53404            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53405        );
53406        let r = _mm512_permutexvar_ps(idx, a);
53407        let e = _mm512_set1_ps(14.);
53408        assert_eq_m512(r, e);
53409    }
53410
53411    #[simd_test(enable = "avx512f")]
53412    unsafe fn test_mm512_mask_permutexvar_ps() {
53413        let idx = _mm512_set1_epi32(1);
53414        let a = _mm512_set_ps(
53415            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53416        );
53417        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
53418        assert_eq_m512(r, a);
53419        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
53420        let e = _mm512_set1_ps(14.);
53421        assert_eq_m512(r, e);
53422    }
53423
53424    #[simd_test(enable = "avx512f")]
53425    unsafe fn test_mm512_maskz_permutexvar_ps() {
53426        let idx = _mm512_set1_epi32(1);
53427        let a = _mm512_set_ps(
53428            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53429        );
53430        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
53431        assert_eq_m512(r, _mm512_setzero_ps());
53432        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
53433        let e = _mm512_set_ps(
53434            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
53435        );
53436        assert_eq_m512(r, e);
53437    }
53438
53439    #[simd_test(enable = "avx512f,avx512vl")]
53440    unsafe fn test_mm256_permutexvar_ps() {
53441        let idx = _mm256_set1_epi32(1);
53442        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53443        let r = _mm256_permutexvar_ps(idx, a);
53444        let e = _mm256_set1_ps(6.);
53445        assert_eq_m256(r, e);
53446    }
53447
53448    #[simd_test(enable = "avx512f,avx512vl")]
53449    unsafe fn test_mm256_mask_permutexvar_ps() {
53450        let idx = _mm256_set1_epi32(1);
53451        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53452        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
53453        assert_eq_m256(r, a);
53454        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
53455        let e = _mm256_set1_ps(6.);
53456        assert_eq_m256(r, e);
53457    }
53458
53459    #[simd_test(enable = "avx512f,avx512vl")]
53460    unsafe fn test_mm256_maskz_permutexvar_ps() {
53461        let idx = _mm256_set1_epi32(1);
53462        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53463        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
53464        assert_eq_m256(r, _mm256_setzero_ps());
53465        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
53466        let e = _mm256_set1_ps(6.);
53467        assert_eq_m256(r, e);
53468    }
53469
53470    #[simd_test(enable = "avx512f")]
53471    unsafe fn test_mm512_permutex2var_epi32() {
53472        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53473        #[rustfmt::skip]
53474        let idx = _mm512_set_epi32(
53475            1, 1 << 4, 2, 1 << 4,
53476            3, 1 << 4, 4, 1 << 4,
53477            5, 1 << 4, 6, 1 << 4,
53478            7, 1 << 4, 8, 1 << 4,
53479        );
53480        let b = _mm512_set1_epi32(100);
53481        let r = _mm512_permutex2var_epi32(a, idx, b);
53482        let e = _mm512_set_epi32(
53483            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53484        );
53485        assert_eq_m512i(r, e);
53486    }
53487
53488    #[simd_test(enable = "avx512f")]
53489    unsafe fn test_mm512_mask_permutex2var_epi32() {
53490        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53491        #[rustfmt::skip]
53492        let idx = _mm512_set_epi32(
53493            1, 1 << 4, 2, 1 << 4,
53494            3, 1 << 4, 4, 1 << 4,
53495            5, 1 << 4, 6, 1 << 4,
53496            7, 1 << 4, 8, 1 << 4,
53497        );
53498        let b = _mm512_set1_epi32(100);
53499        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
53500        assert_eq_m512i(r, a);
53501        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
53502        let e = _mm512_set_epi32(
53503            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53504        );
53505        assert_eq_m512i(r, e);
53506    }
53507
53508    #[simd_test(enable = "avx512f")]
53509    unsafe fn test_mm512_maskz_permutex2var_epi32() {
53510        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53511        #[rustfmt::skip]
53512        let idx = _mm512_set_epi32(
53513            1, 1 << 4, 2, 1 << 4,
53514            3, 1 << 4, 4, 1 << 4,
53515            5, 1 << 4, 6, 1 << 4,
53516            7, 1 << 4, 8, 1 << 4,
53517        );
53518        let b = _mm512_set1_epi32(100);
53519        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
53520        assert_eq_m512i(r, _mm512_setzero_si512());
53521        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
53522        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
53523        assert_eq_m512i(r, e);
53524    }
53525
53526    #[simd_test(enable = "avx512f")]
53527    unsafe fn test_mm512_mask2_permutex2var_epi32() {
53528        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53529        #[rustfmt::skip]
53530        let idx = _mm512_set_epi32(
53531            1000, 1 << 4, 2000, 1 << 4,
53532            3000, 1 << 4, 4000, 1 << 4,
53533            5, 1 << 4, 6, 1 << 4,
53534            7, 1 << 4, 8, 1 << 4,
53535        );
53536        let b = _mm512_set1_epi32(100);
53537        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
53538        assert_eq_m512i(r, idx);
53539        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
53540        #[rustfmt::skip]
53541        let e = _mm512_set_epi32(
53542            1000, 1 << 4, 2000, 1 << 4,
53543            3000, 1 << 4, 4000, 1 << 4,
53544            10, 100, 9, 100,
53545            8, 100, 7, 100,
53546        );
53547        assert_eq_m512i(r, e);
53548    }
53549
53550    #[simd_test(enable = "avx512f,avx512vl")]
53551    unsafe fn test_mm256_permutex2var_epi32() {
53552        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53553        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53554        let b = _mm256_set1_epi32(100);
53555        let r = _mm256_permutex2var_epi32(a, idx, b);
53556        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53557        assert_eq_m256i(r, e);
53558    }
53559
53560    #[simd_test(enable = "avx512f,avx512vl")]
53561    unsafe fn test_mm256_mask_permutex2var_epi32() {
53562        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53563        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53564        let b = _mm256_set1_epi32(100);
53565        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
53566        assert_eq_m256i(r, a);
53567        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
53568        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53569        assert_eq_m256i(r, e);
53570    }
53571
53572    #[simd_test(enable = "avx512f,avx512vl")]
53573    unsafe fn test_mm256_maskz_permutex2var_epi32() {
53574        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53575        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53576        let b = _mm256_set1_epi32(100);
53577        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
53578        assert_eq_m256i(r, _mm256_setzero_si256());
53579        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
53580        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53581        assert_eq_m256i(r, e);
53582    }
53583
53584    #[simd_test(enable = "avx512f,avx512vl")]
53585    unsafe fn test_mm256_mask2_permutex2var_epi32() {
53586        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53587        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53588        let b = _mm256_set1_epi32(100);
53589        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
53590        assert_eq_m256i(r, idx);
53591        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
53592        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53593        assert_eq_m256i(r, e);
53594    }
53595
53596    #[simd_test(enable = "avx512f,avx512vl")]
53597    unsafe fn test_mm_permutex2var_epi32() {
53598        let a = _mm_set_epi32(0, 1, 2, 3);
53599        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53600        let b = _mm_set1_epi32(100);
53601        let r = _mm_permutex2var_epi32(a, idx, b);
53602        let e = _mm_set_epi32(2, 100, 1, 100);
53603        assert_eq_m128i(r, e);
53604    }
53605
53606    #[simd_test(enable = "avx512f,avx512vl")]
53607    unsafe fn test_mm_mask_permutex2var_epi32() {
53608        let a = _mm_set_epi32(0, 1, 2, 3);
53609        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53610        let b = _mm_set1_epi32(100);
53611        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
53612        assert_eq_m128i(r, a);
53613        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
53614        let e = _mm_set_epi32(2, 100, 1, 100);
53615        assert_eq_m128i(r, e);
53616    }
53617
53618    #[simd_test(enable = "avx512f,avx512vl")]
53619    unsafe fn test_mm_maskz_permutex2var_epi32() {
53620        let a = _mm_set_epi32(0, 1, 2, 3);
53621        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53622        let b = _mm_set1_epi32(100);
53623        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
53624        assert_eq_m128i(r, _mm_setzero_si128());
53625        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
53626        let e = _mm_set_epi32(2, 100, 1, 100);
53627        assert_eq_m128i(r, e);
53628    }
53629
53630    #[simd_test(enable = "avx512f,avx512vl")]
53631    unsafe fn test_mm_mask2_permutex2var_epi32() {
53632        let a = _mm_set_epi32(0, 1, 2, 3);
53633        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53634        let b = _mm_set1_epi32(100);
53635        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
53636        assert_eq_m128i(r, idx);
53637        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
53638        let e = _mm_set_epi32(2, 100, 1, 100);
53639        assert_eq_m128i(r, e);
53640    }
53641
53642    #[simd_test(enable = "avx512f")]
53643    unsafe fn test_mm512_permutex2var_ps() {
53644        let a = _mm512_set_ps(
53645            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53646        );
53647        #[rustfmt::skip]
53648        let idx = _mm512_set_epi32(
53649            1, 1 << 4, 2, 1 << 4,
53650            3, 1 << 4, 4, 1 << 4,
53651            5, 1 << 4, 6, 1 << 4,
53652            7, 1 << 4, 8, 1 << 4,
53653        );
53654        let b = _mm512_set1_ps(100.);
53655        let r = _mm512_permutex2var_ps(a, idx, b);
53656        let e = _mm512_set_ps(
53657            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53658        );
53659        assert_eq_m512(r, e);
53660    }
53661
53662    #[simd_test(enable = "avx512f")]
53663    unsafe fn test_mm512_mask_permutex2var_ps() {
53664        let a = _mm512_set_ps(
53665            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53666        );
53667        #[rustfmt::skip]
53668        let idx = _mm512_set_epi32(
53669            1, 1 << 4, 2, 1 << 4,
53670            3, 1 << 4, 4, 1 << 4,
53671            5, 1 << 4, 6, 1 << 4,
53672            7, 1 << 4, 8, 1 << 4,
53673        );
53674        let b = _mm512_set1_ps(100.);
53675        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
53676        assert_eq_m512(r, a);
53677        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
53678        let e = _mm512_set_ps(
53679            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53680        );
53681        assert_eq_m512(r, e);
53682    }
53683
53684    #[simd_test(enable = "avx512f")]
53685    unsafe fn test_mm512_maskz_permutex2var_ps() {
53686        let a = _mm512_set_ps(
53687            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53688        );
53689        #[rustfmt::skip]
53690        let idx = _mm512_set_epi32(
53691            1, 1 << 4, 2, 1 << 4,
53692            3, 1 << 4, 4, 1 << 4,
53693            5, 1 << 4, 6, 1 << 4,
53694            7, 1 << 4, 8, 1 << 4,
53695        );
53696        let b = _mm512_set1_ps(100.);
53697        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
53698        assert_eq_m512(r, _mm512_setzero_ps());
53699        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
53700        let e = _mm512_set_ps(
53701            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
53702        );
53703        assert_eq_m512(r, e);
53704    }
53705
53706    #[simd_test(enable = "avx512f")]
53707    unsafe fn test_mm512_mask2_permutex2var_ps() {
53708        let a = _mm512_set_ps(
53709            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53710        );
53711        #[rustfmt::skip]
53712        let idx = _mm512_set_epi32(
53713            1, 1 << 4, 2, 1 << 4,
53714            3, 1 << 4, 4, 1 << 4,
53715            5, 1 << 4, 6, 1 << 4,
53716            7, 1 << 4, 8, 1 << 4,
53717        );
53718        let b = _mm512_set1_ps(100.);
53719        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
53720        assert_eq_m512(r, _mm512_castsi512_ps(idx));
53721        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
53722        let e = _mm512_set_ps(
53723            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53724        );
53725        assert_eq_m512(r, e);
53726    }
53727
53728    #[simd_test(enable = "avx512f,avx512vl")]
53729    unsafe fn test_mm256_permutex2var_ps() {
53730        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53731        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53732        let b = _mm256_set1_ps(100.);
53733        let r = _mm256_permutex2var_ps(a, idx, b);
53734        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53735        assert_eq_m256(r, e);
53736    }
53737
53738    #[simd_test(enable = "avx512f,avx512vl")]
53739    unsafe fn test_mm256_mask_permutex2var_ps() {
53740        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53741        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53742        let b = _mm256_set1_ps(100.);
53743        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
53744        assert_eq_m256(r, a);
53745        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
53746        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53747        assert_eq_m256(r, e);
53748    }
53749
53750    #[simd_test(enable = "avx512f,avx512vl")]
53751    unsafe fn test_mm256_maskz_permutex2var_ps() {
53752        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53753        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53754        let b = _mm256_set1_ps(100.);
53755        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
53756        assert_eq_m256(r, _mm256_setzero_ps());
53757        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
53758        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53759        assert_eq_m256(r, e);
53760    }
53761
53762    #[simd_test(enable = "avx512f,avx512vl")]
53763    unsafe fn test_mm256_mask2_permutex2var_ps() {
53764        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53765        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53766        let b = _mm256_set1_ps(100.);
53767        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
53768        assert_eq_m256(r, _mm256_castsi256_ps(idx));
53769        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
53770        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53771        assert_eq_m256(r, e);
53772    }
53773
53774    #[simd_test(enable = "avx512f,avx512vl")]
53775    unsafe fn test_mm_permutex2var_ps() {
53776        let a = _mm_set_ps(0., 1., 2., 3.);
53777        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53778        let b = _mm_set1_ps(100.);
53779        let r = _mm_permutex2var_ps(a, idx, b);
53780        let e = _mm_set_ps(2., 100., 1., 100.);
53781        assert_eq_m128(r, e);
53782    }
53783
53784    #[simd_test(enable = "avx512f,avx512vl")]
53785    unsafe fn test_mm_mask_permutex2var_ps() {
53786        let a = _mm_set_ps(0., 1., 2., 3.);
53787        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53788        let b = _mm_set1_ps(100.);
53789        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
53790        assert_eq_m128(r, a);
53791        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
53792        let e = _mm_set_ps(2., 100., 1., 100.);
53793        assert_eq_m128(r, e);
53794    }
53795
53796    #[simd_test(enable = "avx512f,avx512vl")]
53797    unsafe fn test_mm_maskz_permutex2var_ps() {
53798        let a = _mm_set_ps(0., 1., 2., 3.);
53799        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53800        let b = _mm_set1_ps(100.);
53801        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
53802        assert_eq_m128(r, _mm_setzero_ps());
53803        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
53804        let e = _mm_set_ps(2., 100., 1., 100.);
53805        assert_eq_m128(r, e);
53806    }
53807
53808    #[simd_test(enable = "avx512f,avx512vl")]
53809    unsafe fn test_mm_mask2_permutex2var_ps() {
53810        let a = _mm_set_ps(0., 1., 2., 3.);
53811        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53812        let b = _mm_set1_ps(100.);
53813        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
53814        assert_eq_m128(r, _mm_castsi128_ps(idx));
53815        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
53816        let e = _mm_set_ps(2., 100., 1., 100.);
53817        assert_eq_m128(r, e);
53818    }
53819
53820    #[simd_test(enable = "avx512f")]
53821    unsafe fn test_mm512_shuffle_epi32() {
53822        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53823        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53824        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53825        assert_eq_m512i(r, e);
53826    }
53827
53828    #[simd_test(enable = "avx512f")]
53829    unsafe fn test_mm512_mask_shuffle_epi32() {
53830        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53831        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53832        assert_eq_m512i(r, a);
53833        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
53834        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53835        assert_eq_m512i(r, e);
53836    }
53837
53838    #[simd_test(enable = "avx512f")]
53839    unsafe fn test_mm512_maskz_shuffle_epi32() {
53840        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53841        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53842        assert_eq_m512i(r, _mm512_setzero_si512());
53843        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
53844        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
53845        assert_eq_m512i(r, e);
53846    }
53847
53848    #[simd_test(enable = "avx512f,avx512vl")]
53849    unsafe fn test_mm256_mask_shuffle_epi32() {
53850        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53851        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53852        assert_eq_m256i(r, a);
53853        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
53854        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53855        assert_eq_m256i(r, e);
53856    }
53857
53858    #[simd_test(enable = "avx512f,avx512vl")]
53859    unsafe fn test_mm256_maskz_shuffle_epi32() {
53860        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53861        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53862        assert_eq_m256i(r, _mm256_setzero_si256());
53863        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
53864        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53865        assert_eq_m256i(r, e);
53866    }
53867
53868    #[simd_test(enable = "avx512f,avx512vl")]
53869    unsafe fn test_mm_mask_shuffle_epi32() {
53870        let a = _mm_set_epi32(1, 4, 5, 8);
53871        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53872        assert_eq_m128i(r, a);
53873        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
53874        let e = _mm_set_epi32(8, 8, 1, 1);
53875        assert_eq_m128i(r, e);
53876    }
53877
53878    #[simd_test(enable = "avx512f,avx512vl")]
53879    unsafe fn test_mm_maskz_shuffle_epi32() {
53880        let a = _mm_set_epi32(1, 4, 5, 8);
53881        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53882        assert_eq_m128i(r, _mm_setzero_si128());
53883        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
53884        let e = _mm_set_epi32(8, 8, 1, 1);
53885        assert_eq_m128i(r, e);
53886    }
53887
53888    #[simd_test(enable = "avx512f")]
53889    unsafe fn test_mm512_shuffle_ps() {
53890        let a = _mm512_setr_ps(
53891            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53892        );
53893        let b = _mm512_setr_ps(
53894            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53895        );
53896        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
53897        let e = _mm512_setr_ps(
53898            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53899        );
53900        assert_eq_m512(r, e);
53901    }
53902
53903    #[simd_test(enable = "avx512f")]
53904    unsafe fn test_mm512_mask_shuffle_ps() {
53905        let a = _mm512_setr_ps(
53906            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53907        );
53908        let b = _mm512_setr_ps(
53909            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53910        );
53911        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
53912        assert_eq_m512(r, a);
53913        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
53914        let e = _mm512_setr_ps(
53915            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53916        );
53917        assert_eq_m512(r, e);
53918    }
53919
53920    #[simd_test(enable = "avx512f")]
53921    unsafe fn test_mm512_maskz_shuffle_ps() {
53922        let a = _mm512_setr_ps(
53923            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53924        );
53925        let b = _mm512_setr_ps(
53926            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53927        );
53928        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
53929        assert_eq_m512(r, _mm512_setzero_ps());
53930        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
53931        let e = _mm512_setr_ps(
53932            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
53933        );
53934        assert_eq_m512(r, e);
53935    }
53936
53937    #[simd_test(enable = "avx512f,avx512vl")]
53938    unsafe fn test_mm256_mask_shuffle_ps() {
53939        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53940        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53941        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53942        assert_eq_m256(r, a);
53943        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
53944        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53945        assert_eq_m256(r, e);
53946    }
53947
53948    #[simd_test(enable = "avx512f,avx512vl")]
53949    unsafe fn test_mm256_maskz_shuffle_ps() {
53950        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53951        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53952        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
53953        assert_eq_m256(r, _mm256_setzero_ps());
53954        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
53955        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53956        assert_eq_m256(r, e);
53957    }
53958
53959    #[simd_test(enable = "avx512f,avx512vl")]
53960    unsafe fn test_mm_mask_shuffle_ps() {
53961        let a = _mm_set_ps(1., 4., 5., 8.);
53962        let b = _mm_set_ps(2., 3., 6., 7.);
53963        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53964        assert_eq_m128(r, a);
53965        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
53966        let e = _mm_set_ps(7., 7., 1., 1.);
53967        assert_eq_m128(r, e);
53968    }
53969
53970    #[simd_test(enable = "avx512f,avx512vl")]
53971    unsafe fn test_mm_maskz_shuffle_ps() {
53972        let a = _mm_set_ps(1., 4., 5., 8.);
53973        let b = _mm_set_ps(2., 3., 6., 7.);
53974        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
53975        assert_eq_m128(r, _mm_setzero_ps());
53976        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
53977        let e = _mm_set_ps(7., 7., 1., 1.);
53978        assert_eq_m128(r, e);
53979    }
53980
53981    #[simd_test(enable = "avx512f")]
53982    unsafe fn test_mm512_shuffle_i32x4() {
53983        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53984        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
53985        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
53986        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
53987        assert_eq_m512i(r, e);
53988    }
53989
53990    #[simd_test(enable = "avx512f")]
53991    unsafe fn test_mm512_mask_shuffle_i32x4() {
53992        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53993        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
53994        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
53995        assert_eq_m512i(r, a);
53996        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
53997        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
53998        assert_eq_m512i(r, e);
53999    }
54000
54001    #[simd_test(enable = "avx512f")]
54002    unsafe fn test_mm512_maskz_shuffle_i32x4() {
54003        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54004        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54005        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
54006        assert_eq_m512i(r, _mm512_setzero_si512());
54007        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54008        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54009        assert_eq_m512i(r, e);
54010    }
54011
54012    #[simd_test(enable = "avx512f,avx512vl")]
54013    unsafe fn test_mm256_shuffle_i32x4() {
54014        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54015        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54016        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
54017        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54018        assert_eq_m256i(r, e);
54019    }
54020
54021    #[simd_test(enable = "avx512f,avx512vl")]
54022    unsafe fn test_mm256_mask_shuffle_i32x4() {
54023        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54024        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54025        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
54026        assert_eq_m256i(r, a);
54027        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
54028        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54029        assert_eq_m256i(r, e);
54030    }
54031
54032    #[simd_test(enable = "avx512f,avx512vl")]
54033    unsafe fn test_mm256_maskz_shuffle_i32x4() {
54034        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54035        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54036        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
54037        assert_eq_m256i(r, _mm256_setzero_si256());
54038        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
54039        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54040        assert_eq_m256i(r, e);
54041    }
54042
54043    #[simd_test(enable = "avx512f")]
54044    unsafe fn test_mm512_shuffle_f32x4() {
54045        let a = _mm512_setr_ps(
54046            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54047        );
54048        let b = _mm512_setr_ps(
54049            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54050        );
54051        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
54052        let e = _mm512_setr_ps(
54053            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54054        );
54055        assert_eq_m512(r, e);
54056    }
54057
54058    #[simd_test(enable = "avx512f")]
54059    unsafe fn test_mm512_mask_shuffle_f32x4() {
54060        let a = _mm512_setr_ps(
54061            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54062        );
54063        let b = _mm512_setr_ps(
54064            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54065        );
54066        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
54067        assert_eq_m512(r, a);
54068        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54069        let e = _mm512_setr_ps(
54070            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54071        );
54072        assert_eq_m512(r, e);
54073    }
54074
54075    #[simd_test(enable = "avx512f")]
54076    unsafe fn test_mm512_maskz_shuffle_f32x4() {
54077        let a = _mm512_setr_ps(
54078            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54079        );
54080        let b = _mm512_setr_ps(
54081            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54082        );
54083        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
54084        assert_eq_m512(r, _mm512_setzero_ps());
54085        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54086        let e = _mm512_setr_ps(
54087            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54088        );
54089        assert_eq_m512(r, e);
54090    }
54091
54092    #[simd_test(enable = "avx512f,avx512vl")]
54093    unsafe fn test_mm256_shuffle_f32x4() {
54094        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54095        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54096        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
54097        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54098        assert_eq_m256(r, e);
54099    }
54100
54101    #[simd_test(enable = "avx512f,avx512vl")]
54102    unsafe fn test_mm256_mask_shuffle_f32x4() {
54103        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54104        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54105        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
54106        assert_eq_m256(r, a);
54107        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
54108        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54109        assert_eq_m256(r, e);
54110    }
54111
54112    #[simd_test(enable = "avx512f,avx512vl")]
54113    unsafe fn test_mm256_maskz_shuffle_f32x4() {
54114        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54115        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54116        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
54117        assert_eq_m256(r, _mm256_setzero_ps());
54118        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
54119        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54120        assert_eq_m256(r, e);
54121    }
54122
54123    #[simd_test(enable = "avx512f")]
54124    unsafe fn test_mm512_extractf32x4_ps() {
54125        let a = _mm512_setr_ps(
54126            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54127        );
54128        let r = _mm512_extractf32x4_ps::<1>(a);
54129        let e = _mm_setr_ps(5., 6., 7., 8.);
54130        assert_eq_m128(r, e);
54131    }
54132
54133    #[simd_test(enable = "avx512f")]
54134    unsafe fn test_mm512_mask_extractf32x4_ps() {
54135        let a = _mm512_setr_ps(
54136            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54137        );
54138        let src = _mm_set1_ps(100.);
54139        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
54140        assert_eq_m128(r, src);
54141        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
54142        let e = _mm_setr_ps(5., 6., 7., 8.);
54143        assert_eq_m128(r, e);
54144    }
54145
54146    #[simd_test(enable = "avx512f")]
54147    unsafe fn test_mm512_maskz_extractf32x4_ps() {
54148        let a = _mm512_setr_ps(
54149            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54150        );
54151        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
54152        assert_eq_m128(r, _mm_setzero_ps());
54153        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
54154        let e = _mm_setr_ps(5., 0., 0., 0.);
54155        assert_eq_m128(r, e);
54156    }
54157
54158    #[simd_test(enable = "avx512f,avx512vl")]
54159    unsafe fn test_mm256_extractf32x4_ps() {
54160        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54161        let r = _mm256_extractf32x4_ps::<1>(a);
54162        let e = _mm_set_ps(1., 2., 3., 4.);
54163        assert_eq_m128(r, e);
54164    }
54165
54166    #[simd_test(enable = "avx512f,avx512vl")]
54167    unsafe fn test_mm256_mask_extractf32x4_ps() {
54168        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54169        let src = _mm_set1_ps(100.);
54170        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
54171        assert_eq_m128(r, src);
54172        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
54173        let e = _mm_set_ps(1., 2., 3., 4.);
54174        assert_eq_m128(r, e);
54175    }
54176
54177    #[simd_test(enable = "avx512f,avx512vl")]
54178    unsafe fn test_mm256_maskz_extractf32x4_ps() {
54179        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54180        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
54181        assert_eq_m128(r, _mm_setzero_ps());
54182        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
54183        let e = _mm_set_ps(1., 2., 3., 4.);
54184        assert_eq_m128(r, e);
54185    }
54186
54187    #[simd_test(enable = "avx512f")]
54188    unsafe fn test_mm512_extracti32x4_epi32() {
54189        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54190        let r = _mm512_extracti32x4_epi32::<1>(a);
54191        let e = _mm_setr_epi32(5, 6, 7, 8);
54192        assert_eq_m128i(r, e);
54193    }
54194
54195    #[simd_test(enable = "avx512f")]
54196    unsafe fn test_mm512_mask_extracti32x4_epi32() {
54197        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54198        let src = _mm_set1_epi32(100);
54199        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
54200        assert_eq_m128i(r, src);
54201        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
54202        let e = _mm_setr_epi32(5, 6, 7, 8);
54203        assert_eq_m128i(r, e);
54204    }
54205
54206    #[simd_test(enable = "avx512f,avx512vl")]
54207    unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54208        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54209        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
54210        assert_eq_m128i(r, _mm_setzero_si128());
54211        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
54212        let e = _mm_setr_epi32(5, 0, 0, 0);
54213        assert_eq_m128i(r, e);
54214    }
54215
54216    #[simd_test(enable = "avx512f,avx512vl")]
54217    unsafe fn test_mm256_extracti32x4_epi32() {
54218        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54219        let r = _mm256_extracti32x4_epi32::<1>(a);
54220        let e = _mm_set_epi32(1, 2, 3, 4);
54221        assert_eq_m128i(r, e);
54222    }
54223
54224    #[simd_test(enable = "avx512f,avx512vl")]
54225    unsafe fn test_mm256_mask_extracti32x4_epi32() {
54226        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54227        let src = _mm_set1_epi32(100);
54228        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
54229        assert_eq_m128i(r, src);
54230        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
54231        let e = _mm_set_epi32(1, 2, 3, 4);
54232        assert_eq_m128i(r, e);
54233    }
54234
54235    #[simd_test(enable = "avx512f,avx512vl")]
54236    unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54237        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54238        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
54239        assert_eq_m128i(r, _mm_setzero_si128());
54240        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
54241        let e = _mm_set_epi32(1, 2, 3, 4);
54242        assert_eq_m128i(r, e);
54243    }
54244
54245    #[simd_test(enable = "avx512f")]
54246    unsafe fn test_mm512_moveldup_ps() {
54247        let a = _mm512_setr_ps(
54248            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54249        );
54250        let r = _mm512_moveldup_ps(a);
54251        let e = _mm512_setr_ps(
54252            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54253        );
54254        assert_eq_m512(r, e);
54255    }
54256
54257    #[simd_test(enable = "avx512f")]
54258    unsafe fn test_mm512_mask_moveldup_ps() {
54259        let a = _mm512_setr_ps(
54260            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54261        );
54262        let r = _mm512_mask_moveldup_ps(a, 0, a);
54263        assert_eq_m512(r, a);
54264        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
54265        let e = _mm512_setr_ps(
54266            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54267        );
54268        assert_eq_m512(r, e);
54269    }
54270
54271    #[simd_test(enable = "avx512f")]
54272    unsafe fn test_mm512_maskz_moveldup_ps() {
54273        let a = _mm512_setr_ps(
54274            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54275        );
54276        let r = _mm512_maskz_moveldup_ps(0, a);
54277        assert_eq_m512(r, _mm512_setzero_ps());
54278        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
54279        let e = _mm512_setr_ps(
54280            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
54281        );
54282        assert_eq_m512(r, e);
54283    }
54284
54285    #[simd_test(enable = "avx512f,avx512vl")]
54286    unsafe fn test_mm256_mask_moveldup_ps() {
54287        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54288        let r = _mm256_mask_moveldup_ps(a, 0, a);
54289        assert_eq_m256(r, a);
54290        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
54291        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54292        assert_eq_m256(r, e);
54293    }
54294
54295    #[simd_test(enable = "avx512f,avx512vl")]
54296    unsafe fn test_mm256_maskz_moveldup_ps() {
54297        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54298        let r = _mm256_maskz_moveldup_ps(0, a);
54299        assert_eq_m256(r, _mm256_setzero_ps());
54300        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
54301        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54302        assert_eq_m256(r, e);
54303    }
54304
54305    #[simd_test(enable = "avx512f,avx512vl")]
54306    unsafe fn test_mm_mask_moveldup_ps() {
54307        let a = _mm_set_ps(1., 2., 3., 4.);
54308        let r = _mm_mask_moveldup_ps(a, 0, a);
54309        assert_eq_m128(r, a);
54310        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
54311        let e = _mm_set_ps(2., 2., 4., 4.);
54312        assert_eq_m128(r, e);
54313    }
54314
54315    #[simd_test(enable = "avx512f,avx512vl")]
54316    unsafe fn test_mm_maskz_moveldup_ps() {
54317        let a = _mm_set_ps(1., 2., 3., 4.);
54318        let r = _mm_maskz_moveldup_ps(0, a);
54319        assert_eq_m128(r, _mm_setzero_ps());
54320        let r = _mm_maskz_moveldup_ps(0b00001111, a);
54321        let e = _mm_set_ps(2., 2., 4., 4.);
54322        assert_eq_m128(r, e);
54323    }
54324
54325    #[simd_test(enable = "avx512f")]
54326    unsafe fn test_mm512_movehdup_ps() {
54327        let a = _mm512_setr_ps(
54328            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54329        );
54330        let r = _mm512_movehdup_ps(a);
54331        let e = _mm512_setr_ps(
54332            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54333        );
54334        assert_eq_m512(r, e);
54335    }
54336
54337    #[simd_test(enable = "avx512f")]
54338    unsafe fn test_mm512_mask_movehdup_ps() {
54339        let a = _mm512_setr_ps(
54340            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54341        );
54342        let r = _mm512_mask_movehdup_ps(a, 0, a);
54343        assert_eq_m512(r, a);
54344        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
54345        let e = _mm512_setr_ps(
54346            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54347        );
54348        assert_eq_m512(r, e);
54349    }
54350
54351    #[simd_test(enable = "avx512f")]
54352    unsafe fn test_mm512_maskz_movehdup_ps() {
54353        let a = _mm512_setr_ps(
54354            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54355        );
54356        let r = _mm512_maskz_movehdup_ps(0, a);
54357        assert_eq_m512(r, _mm512_setzero_ps());
54358        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
54359        let e = _mm512_setr_ps(
54360            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54361        );
54362        assert_eq_m512(r, e);
54363    }
54364
54365    #[simd_test(enable = "avx512f,avx512vl")]
54366    unsafe fn test_mm256_mask_movehdup_ps() {
54367        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54368        let r = _mm256_mask_movehdup_ps(a, 0, a);
54369        assert_eq_m256(r, a);
54370        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
54371        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54372        assert_eq_m256(r, e);
54373    }
54374
54375    #[simd_test(enable = "avx512f,avx512vl")]
54376    unsafe fn test_mm256_maskz_movehdup_ps() {
54377        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54378        let r = _mm256_maskz_movehdup_ps(0, a);
54379        assert_eq_m256(r, _mm256_setzero_ps());
54380        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
54381        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54382        assert_eq_m256(r, e);
54383    }
54384
54385    #[simd_test(enable = "avx512f,avx512vl")]
54386    unsafe fn test_mm_mask_movehdup_ps() {
54387        let a = _mm_set_ps(1., 2., 3., 4.);
54388        let r = _mm_mask_movehdup_ps(a, 0, a);
54389        assert_eq_m128(r, a);
54390        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
54391        let e = _mm_set_ps(1., 1., 3., 3.);
54392        assert_eq_m128(r, e);
54393    }
54394
54395    #[simd_test(enable = "avx512f,avx512vl")]
54396    unsafe fn test_mm_maskz_movehdup_ps() {
54397        let a = _mm_set_ps(1., 2., 3., 4.);
54398        let r = _mm_maskz_movehdup_ps(0, a);
54399        assert_eq_m128(r, _mm_setzero_ps());
54400        let r = _mm_maskz_movehdup_ps(0b00001111, a);
54401        let e = _mm_set_ps(1., 1., 3., 3.);
54402        assert_eq_m128(r, e);
54403    }
54404
54405    #[simd_test(enable = "avx512f")]
54406    unsafe fn test_mm512_inserti32x4() {
54407        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54408        let b = _mm_setr_epi32(17, 18, 19, 20);
54409        let r = _mm512_inserti32x4::<0>(a, b);
54410        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54411        assert_eq_m512i(r, e);
54412    }
54413
54414    #[simd_test(enable = "avx512f")]
54415    unsafe fn test_mm512_mask_inserti32x4() {
54416        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54417        let b = _mm_setr_epi32(17, 18, 19, 20);
54418        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
54419        assert_eq_m512i(r, a);
54420        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
54421        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54422        assert_eq_m512i(r, e);
54423    }
54424
54425    #[simd_test(enable = "avx512f")]
54426    unsafe fn test_mm512_maskz_inserti32x4() {
54427        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54428        let b = _mm_setr_epi32(17, 18, 19, 20);
54429        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
54430        assert_eq_m512i(r, _mm512_setzero_si512());
54431        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
54432        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54433        assert_eq_m512i(r, e);
54434    }
54435
54436    #[simd_test(enable = "avx512f,avx512vl")]
54437    unsafe fn test_mm256_inserti32x4() {
54438        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54439        let b = _mm_set_epi32(17, 18, 19, 20);
54440        let r = _mm256_inserti32x4::<1>(a, b);
54441        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54442        assert_eq_m256i(r, e);
54443    }
54444
54445    #[simd_test(enable = "avx512f,avx512vl")]
54446    unsafe fn test_mm256_mask_inserti32x4() {
54447        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54448        let b = _mm_set_epi32(17, 18, 19, 20);
54449        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
54450        assert_eq_m256i(r, a);
54451        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
54452        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54453        assert_eq_m256i(r, e);
54454    }
54455
54456    #[simd_test(enable = "avx512f,avx512vl")]
54457    unsafe fn test_mm256_maskz_inserti32x4() {
54458        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54459        let b = _mm_set_epi32(17, 18, 19, 20);
54460        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
54461        assert_eq_m256i(r, _mm256_setzero_si256());
54462        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
54463        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54464        assert_eq_m256i(r, e);
54465    }
54466
54467    #[simd_test(enable = "avx512f")]
54468    unsafe fn test_mm512_insertf32x4() {
54469        let a = _mm512_setr_ps(
54470            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54471        );
54472        let b = _mm_setr_ps(17., 18., 19., 20.);
54473        let r = _mm512_insertf32x4::<0>(a, b);
54474        let e = _mm512_setr_ps(
54475            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54476        );
54477        assert_eq_m512(r, e);
54478    }
54479
54480    #[simd_test(enable = "avx512f")]
54481    unsafe fn test_mm512_mask_insertf32x4() {
54482        let a = _mm512_setr_ps(
54483            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54484        );
54485        let b = _mm_setr_ps(17., 18., 19., 20.);
54486        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
54487        assert_eq_m512(r, a);
54488        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
54489        let e = _mm512_setr_ps(
54490            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54491        );
54492        assert_eq_m512(r, e);
54493    }
54494
54495    #[simd_test(enable = "avx512f")]
54496    unsafe fn test_mm512_maskz_insertf32x4() {
54497        let a = _mm512_setr_ps(
54498            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54499        );
54500        let b = _mm_setr_ps(17., 18., 19., 20.);
54501        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
54502        assert_eq_m512(r, _mm512_setzero_ps());
54503        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
54504        let e = _mm512_setr_ps(
54505            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54506        );
54507        assert_eq_m512(r, e);
54508    }
54509
54510    #[simd_test(enable = "avx512f,avx512vl")]
54511    unsafe fn test_mm256_insertf32x4() {
54512        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54513        let b = _mm_set_ps(17., 18., 19., 20.);
54514        let r = _mm256_insertf32x4::<1>(a, b);
54515        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54516        assert_eq_m256(r, e);
54517    }
54518
54519    #[simd_test(enable = "avx512f,avx512vl")]
54520    unsafe fn test_mm256_mask_insertf32x4() {
54521        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54522        let b = _mm_set_ps(17., 18., 19., 20.);
54523        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
54524        assert_eq_m256(r, a);
54525        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
54526        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54527        assert_eq_m256(r, e);
54528    }
54529
54530    #[simd_test(enable = "avx512f,avx512vl")]
54531    unsafe fn test_mm256_maskz_insertf32x4() {
54532        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54533        let b = _mm_set_ps(17., 18., 19., 20.);
54534        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
54535        assert_eq_m256(r, _mm256_setzero_ps());
54536        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
54537        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54538        assert_eq_m256(r, e);
54539    }
54540
54541    #[simd_test(enable = "avx512f")]
54542    unsafe fn test_mm512_castps128_ps512() {
54543        let a = _mm_setr_ps(17., 18., 19., 20.);
54544        let r = _mm512_castps128_ps512(a);
54545        assert_eq_m128(_mm512_castps512_ps128(r), a);
54546    }
54547
54548    #[simd_test(enable = "avx512f")]
54549    unsafe fn test_mm512_castps256_ps512() {
54550        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54551        let r = _mm512_castps256_ps512(a);
54552        assert_eq_m256(_mm512_castps512_ps256(r), a);
54553    }
54554
54555    #[simd_test(enable = "avx512f")]
54556    unsafe fn test_mm512_zextps128_ps512() {
54557        let a = _mm_setr_ps(17., 18., 19., 20.);
54558        let r = _mm512_zextps128_ps512(a);
54559        let e = _mm512_setr_ps(
54560            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
54561        );
54562        assert_eq_m512(r, e);
54563    }
54564
54565    #[simd_test(enable = "avx512f")]
54566    unsafe fn test_mm512_zextps256_ps512() {
54567        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54568        let r = _mm512_zextps256_ps512(a);
54569        let e = _mm512_setr_ps(
54570            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
54571        );
54572        assert_eq_m512(r, e);
54573    }
54574
54575    #[simd_test(enable = "avx512f")]
54576    unsafe fn test_mm512_castps512_ps128() {
54577        let a = _mm512_setr_ps(
54578            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
54579        );
54580        let r = _mm512_castps512_ps128(a);
54581        let e = _mm_setr_ps(17., 18., 19., 20.);
54582        assert_eq_m128(r, e);
54583    }
54584
54585    #[simd_test(enable = "avx512f")]
54586    unsafe fn test_mm512_castps512_ps256() {
54587        let a = _mm512_setr_ps(
54588            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
54589        );
54590        let r = _mm512_castps512_ps256(a);
54591        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54592        assert_eq_m256(r, e);
54593    }
54594
54595    #[simd_test(enable = "avx512f")]
54596    unsafe fn test_mm512_castps_pd() {
54597        let a = _mm512_set1_ps(1.);
54598        let r = _mm512_castps_pd(a);
54599        let e = _mm512_set1_pd(0.007812501848093234);
54600        assert_eq_m512d(r, e);
54601    }
54602
54603    #[simd_test(enable = "avx512f")]
54604    unsafe fn test_mm512_castps_si512() {
54605        let a = _mm512_set1_ps(1.);
54606        let r = _mm512_castps_si512(a);
54607        let e = _mm512_set1_epi32(1065353216);
54608        assert_eq_m512i(r, e);
54609    }
54610
54611    #[simd_test(enable = "avx512f")]
54612    unsafe fn test_mm512_broadcastd_epi32() {
54613        let a = _mm_set_epi32(17, 18, 19, 20);
54614        let r = _mm512_broadcastd_epi32(a);
54615        let e = _mm512_set1_epi32(20);
54616        assert_eq_m512i(r, e);
54617    }
54618
54619    #[simd_test(enable = "avx512f")]
54620    unsafe fn test_mm512_mask_broadcastd_epi32() {
54621        let src = _mm512_set1_epi32(20);
54622        let a = _mm_set_epi32(17, 18, 19, 20);
54623        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
54624        assert_eq_m512i(r, src);
54625        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
54626        let e = _mm512_set1_epi32(20);
54627        assert_eq_m512i(r, e);
54628    }
54629
54630    #[simd_test(enable = "avx512f")]
54631    unsafe fn test_mm512_maskz_broadcastd_epi32() {
54632        let a = _mm_set_epi32(17, 18, 19, 20);
54633        let r = _mm512_maskz_broadcastd_epi32(0, a);
54634        assert_eq_m512i(r, _mm512_setzero_si512());
54635        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
54636        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
54637        assert_eq_m512i(r, e);
54638    }
54639
54640    #[simd_test(enable = "avx512f,avx512vl")]
54641    unsafe fn test_mm256_mask_broadcastd_epi32() {
54642        let src = _mm256_set1_epi32(20);
54643        let a = _mm_set_epi32(17, 18, 19, 20);
54644        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
54645        assert_eq_m256i(r, src);
54646        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
54647        let e = _mm256_set1_epi32(20);
54648        assert_eq_m256i(r, e);
54649    }
54650
54651    #[simd_test(enable = "avx512f,avx512vl")]
54652    unsafe fn test_mm256_maskz_broadcastd_epi32() {
54653        let a = _mm_set_epi32(17, 18, 19, 20);
54654        let r = _mm256_maskz_broadcastd_epi32(0, a);
54655        assert_eq_m256i(r, _mm256_setzero_si256());
54656        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
54657        let e = _mm256_set1_epi32(20);
54658        assert_eq_m256i(r, e);
54659    }
54660
54661    #[simd_test(enable = "avx512f,avx512vl")]
54662    unsafe fn test_mm_mask_broadcastd_epi32() {
54663        let src = _mm_set1_epi32(20);
54664        let a = _mm_set_epi32(17, 18, 19, 20);
54665        let r = _mm_mask_broadcastd_epi32(src, 0, a);
54666        assert_eq_m128i(r, src);
54667        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
54668        let e = _mm_set1_epi32(20);
54669        assert_eq_m128i(r, e);
54670    }
54671
54672    #[simd_test(enable = "avx512f,avx512vl")]
54673    unsafe fn test_mm_maskz_broadcastd_epi32() {
54674        let a = _mm_set_epi32(17, 18, 19, 20);
54675        let r = _mm_maskz_broadcastd_epi32(0, a);
54676        assert_eq_m128i(r, _mm_setzero_si128());
54677        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
54678        let e = _mm_set1_epi32(20);
54679        assert_eq_m128i(r, e);
54680    }
54681
54682    #[simd_test(enable = "avx512f")]
54683    unsafe fn test_mm512_broadcastss_ps() {
54684        let a = _mm_set_ps(17., 18., 19., 20.);
54685        let r = _mm512_broadcastss_ps(a);
54686        let e = _mm512_set1_ps(20.);
54687        assert_eq_m512(r, e);
54688    }
54689
54690    #[simd_test(enable = "avx512f")]
54691    unsafe fn test_mm512_mask_broadcastss_ps() {
54692        let src = _mm512_set1_ps(20.);
54693        let a = _mm_set_ps(17., 18., 19., 20.);
54694        let r = _mm512_mask_broadcastss_ps(src, 0, a);
54695        assert_eq_m512(r, src);
54696        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
54697        let e = _mm512_set1_ps(20.);
54698        assert_eq_m512(r, e);
54699    }
54700
54701    #[simd_test(enable = "avx512f")]
54702    unsafe fn test_mm512_maskz_broadcastss_ps() {
54703        let a = _mm_set_ps(17., 18., 19., 20.);
54704        let r = _mm512_maskz_broadcastss_ps(0, a);
54705        assert_eq_m512(r, _mm512_setzero_ps());
54706        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
54707        let e = _mm512_setr_ps(
54708            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
54709        );
54710        assert_eq_m512(r, e);
54711    }
54712
54713    #[simd_test(enable = "avx512f,avx512vl")]
54714    unsafe fn test_mm256_mask_broadcastss_ps() {
54715        let src = _mm256_set1_ps(20.);
54716        let a = _mm_set_ps(17., 18., 19., 20.);
54717        let r = _mm256_mask_broadcastss_ps(src, 0, a);
54718        assert_eq_m256(r, src);
54719        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
54720        let e = _mm256_set1_ps(20.);
54721        assert_eq_m256(r, e);
54722    }
54723
54724    #[simd_test(enable = "avx512f,avx512vl")]
54725    unsafe fn test_mm256_maskz_broadcastss_ps() {
54726        let a = _mm_set_ps(17., 18., 19., 20.);
54727        let r = _mm256_maskz_broadcastss_ps(0, a);
54728        assert_eq_m256(r, _mm256_setzero_ps());
54729        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
54730        let e = _mm256_set1_ps(20.);
54731        assert_eq_m256(r, e);
54732    }
54733
54734    #[simd_test(enable = "avx512f,avx512vl")]
54735    unsafe fn test_mm_mask_broadcastss_ps() {
54736        let src = _mm_set1_ps(20.);
54737        let a = _mm_set_ps(17., 18., 19., 20.);
54738        let r = _mm_mask_broadcastss_ps(src, 0, a);
54739        assert_eq_m128(r, src);
54740        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
54741        let e = _mm_set1_ps(20.);
54742        assert_eq_m128(r, e);
54743    }
54744
54745    #[simd_test(enable = "avx512f,avx512vl")]
54746    unsafe fn test_mm_maskz_broadcastss_ps() {
54747        let a = _mm_set_ps(17., 18., 19., 20.);
54748        let r = _mm_maskz_broadcastss_ps(0, a);
54749        assert_eq_m128(r, _mm_setzero_ps());
54750        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
54751        let e = _mm_set1_ps(20.);
54752        assert_eq_m128(r, e);
54753    }
54754
54755    #[simd_test(enable = "avx512f")]
54756    unsafe fn test_mm512_broadcast_i32x4() {
54757        let a = _mm_set_epi32(17, 18, 19, 20);
54758        let r = _mm512_broadcast_i32x4(a);
54759        let e = _mm512_set_epi32(
54760            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54761        );
54762        assert_eq_m512i(r, e);
54763    }
54764
54765    #[simd_test(enable = "avx512f")]
54766    unsafe fn test_mm512_mask_broadcast_i32x4() {
54767        let src = _mm512_set1_epi32(20);
54768        let a = _mm_set_epi32(17, 18, 19, 20);
54769        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
54770        assert_eq_m512i(r, src);
54771        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
54772        let e = _mm512_set_epi32(
54773            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54774        );
54775        assert_eq_m512i(r, e);
54776    }
54777
54778    #[simd_test(enable = "avx512f")]
54779    unsafe fn test_mm512_maskz_broadcast_i32x4() {
54780        let a = _mm_set_epi32(17, 18, 19, 20);
54781        let r = _mm512_maskz_broadcast_i32x4(0, a);
54782        assert_eq_m512i(r, _mm512_setzero_si512());
54783        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
54784        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
54785        assert_eq_m512i(r, e);
54786    }
54787
54788    #[simd_test(enable = "avx512f,avx512vl")]
54789    unsafe fn test_mm256_broadcast_i32x4() {
54790        let a = _mm_set_epi32(17, 18, 19, 20);
54791        let r = _mm256_broadcast_i32x4(a);
54792        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54793        assert_eq_m256i(r, e);
54794    }
54795
54796    #[simd_test(enable = "avx512f,avx512vl")]
54797    unsafe fn test_mm256_mask_broadcast_i32x4() {
54798        let src = _mm256_set1_epi32(20);
54799        let a = _mm_set_epi32(17, 18, 19, 20);
54800        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
54801        assert_eq_m256i(r, src);
54802        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
54803        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54804        assert_eq_m256i(r, e);
54805    }
54806
54807    #[simd_test(enable = "avx512f,avx512vl")]
54808    unsafe fn test_mm256_maskz_broadcast_i32x4() {
54809        let a = _mm_set_epi32(17, 18, 19, 20);
54810        let r = _mm256_maskz_broadcast_i32x4(0, a);
54811        assert_eq_m256i(r, _mm256_setzero_si256());
54812        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
54813        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54814        assert_eq_m256i(r, e);
54815    }
54816
54817    #[simd_test(enable = "avx512f")]
54818    unsafe fn test_mm512_broadcast_f32x4() {
54819        let a = _mm_set_ps(17., 18., 19., 20.);
54820        let r = _mm512_broadcast_f32x4(a);
54821        let e = _mm512_set_ps(
54822            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54823        );
54824        assert_eq_m512(r, e);
54825    }
54826
54827    #[simd_test(enable = "avx512f")]
54828    unsafe fn test_mm512_mask_broadcast_f32x4() {
54829        let src = _mm512_set1_ps(20.);
54830        let a = _mm_set_ps(17., 18., 19., 20.);
54831        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
54832        assert_eq_m512(r, src);
54833        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
54834        let e = _mm512_set_ps(
54835            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54836        );
54837        assert_eq_m512(r, e);
54838    }
54839
54840    #[simd_test(enable = "avx512f")]
54841    unsafe fn test_mm512_maskz_broadcast_f32x4() {
54842        let a = _mm_set_ps(17., 18., 19., 20.);
54843        let r = _mm512_maskz_broadcast_f32x4(0, a);
54844        assert_eq_m512(r, _mm512_setzero_ps());
54845        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
54846        let e = _mm512_set_ps(
54847            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
54848        );
54849        assert_eq_m512(r, e);
54850    }
54851
54852    #[simd_test(enable = "avx512f,avx512vl")]
54853    unsafe fn test_mm256_broadcast_f32x4() {
54854        let a = _mm_set_ps(17., 18., 19., 20.);
54855        let r = _mm256_broadcast_f32x4(a);
54856        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54857        assert_eq_m256(r, e);
54858    }
54859
54860    #[simd_test(enable = "avx512f,avx512vl")]
54861    unsafe fn test_mm256_mask_broadcast_f32x4() {
54862        let src = _mm256_set1_ps(20.);
54863        let a = _mm_set_ps(17., 18., 19., 20.);
54864        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
54865        assert_eq_m256(r, src);
54866        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
54867        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54868        assert_eq_m256(r, e);
54869    }
54870
54871    #[simd_test(enable = "avx512f,avx512vl")]
54872    unsafe fn test_mm256_maskz_broadcast_f32x4() {
54873        let a = _mm_set_ps(17., 18., 19., 20.);
54874        let r = _mm256_maskz_broadcast_f32x4(0, a);
54875        assert_eq_m256(r, _mm256_setzero_ps());
54876        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
54877        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54878        assert_eq_m256(r, e);
54879    }
54880
54881    #[simd_test(enable = "avx512f")]
54882    unsafe fn test_mm512_mask_blend_epi32() {
54883        let a = _mm512_set1_epi32(1);
54884        let b = _mm512_set1_epi32(2);
54885        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
54886        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
54887        assert_eq_m512i(r, e);
54888    }
54889
54890    #[simd_test(enable = "avx512f,avx512vl")]
54891    unsafe fn test_mm256_mask_blend_epi32() {
54892        let a = _mm256_set1_epi32(1);
54893        let b = _mm256_set1_epi32(2);
54894        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
54895        let e = _mm256_set1_epi32(2);
54896        assert_eq_m256i(r, e);
54897    }
54898
54899    #[simd_test(enable = "avx512f,avx512vl")]
54900    unsafe fn test_mm_mask_blend_epi32() {
54901        let a = _mm_set1_epi32(1);
54902        let b = _mm_set1_epi32(2);
54903        let r = _mm_mask_blend_epi32(0b00001111, a, b);
54904        let e = _mm_set1_epi32(2);
54905        assert_eq_m128i(r, e);
54906    }
54907
54908    #[simd_test(enable = "avx512f")]
54909    unsafe fn test_mm512_mask_blend_ps() {
54910        let a = _mm512_set1_ps(1.);
54911        let b = _mm512_set1_ps(2.);
54912        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
54913        let e = _mm512_set_ps(
54914            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
54915        );
54916        assert_eq_m512(r, e);
54917    }
54918
54919    #[simd_test(enable = "avx512f,avx512vl")]
54920    unsafe fn test_mm256_mask_blend_ps() {
54921        let a = _mm256_set1_ps(1.);
54922        let b = _mm256_set1_ps(2.);
54923        let r = _mm256_mask_blend_ps(0b11111111, a, b);
54924        let e = _mm256_set1_ps(2.);
54925        assert_eq_m256(r, e);
54926    }
54927
54928    #[simd_test(enable = "avx512f,avx512vl")]
54929    unsafe fn test_mm_mask_blend_ps() {
54930        let a = _mm_set1_ps(1.);
54931        let b = _mm_set1_ps(2.);
54932        let r = _mm_mask_blend_ps(0b00001111, a, b);
54933        let e = _mm_set1_ps(2.);
54934        assert_eq_m128(r, e);
54935    }
54936
54937    #[simd_test(enable = "avx512f")]
54938    unsafe fn test_mm512_unpackhi_epi32() {
54939        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54940        let b = _mm512_set_epi32(
54941            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54942        );
54943        let r = _mm512_unpackhi_epi32(a, b);
54944        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54945        assert_eq_m512i(r, e);
54946    }
54947
54948    #[simd_test(enable = "avx512f")]
54949    unsafe fn test_mm512_mask_unpackhi_epi32() {
54950        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54951        let b = _mm512_set_epi32(
54952            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54953        );
54954        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
54955        assert_eq_m512i(r, a);
54956        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
54957        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54958        assert_eq_m512i(r, e);
54959    }
54960
54961    #[simd_test(enable = "avx512f")]
54962    unsafe fn test_mm512_maskz_unpackhi_epi32() {
54963        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54964        let b = _mm512_set_epi32(
54965            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54966        );
54967        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
54968        assert_eq_m512i(r, _mm512_setzero_si512());
54969        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
54970        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
54971        assert_eq_m512i(r, e);
54972    }
54973
54974    #[simd_test(enable = "avx512f,avx512vl")]
54975    unsafe fn test_mm256_mask_unpackhi_epi32() {
54976        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54977        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
54978        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
54979        assert_eq_m256i(r, a);
54980        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
54981        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
54982        assert_eq_m256i(r, e);
54983    }
54984
54985    #[simd_test(enable = "avx512f,avx512vl")]
54986    unsafe fn test_mm256_maskz_unpackhi_epi32() {
54987        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54988        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
54989        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
54990        assert_eq_m256i(r, _mm256_setzero_si256());
54991        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
54992        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
54993        assert_eq_m256i(r, e);
54994    }
54995
54996    #[simd_test(enable = "avx512f,avx512vl")]
54997    unsafe fn test_mm_mask_unpackhi_epi32() {
54998        let a = _mm_set_epi32(1, 2, 3, 4);
54999        let b = _mm_set_epi32(17, 18, 19, 20);
55000        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
55001        assert_eq_m128i(r, a);
55002        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
55003        let e = _mm_set_epi32(17, 1, 18, 2);
55004        assert_eq_m128i(r, e);
55005    }
55006
55007    #[simd_test(enable = "avx512f,avx512vl")]
55008    unsafe fn test_mm_maskz_unpackhi_epi32() {
55009        let a = _mm_set_epi32(1, 2, 3, 4);
55010        let b = _mm_set_epi32(17, 18, 19, 20);
55011        let r = _mm_maskz_unpackhi_epi32(0, a, b);
55012        assert_eq_m128i(r, _mm_setzero_si128());
55013        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
55014        let e = _mm_set_epi32(17, 1, 18, 2);
55015        assert_eq_m128i(r, e);
55016    }
55017
55018    #[simd_test(enable = "avx512f")]
55019    unsafe fn test_mm512_unpackhi_ps() {
55020        let a = _mm512_set_ps(
55021            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55022        );
55023        let b = _mm512_set_ps(
55024            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55025        );
55026        let r = _mm512_unpackhi_ps(a, b);
55027        let e = _mm512_set_ps(
55028            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55029        );
55030        assert_eq_m512(r, e);
55031    }
55032
55033    #[simd_test(enable = "avx512f")]
55034    unsafe fn test_mm512_mask_unpackhi_ps() {
55035        let a = _mm512_set_ps(
55036            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55037        );
55038        let b = _mm512_set_ps(
55039            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55040        );
55041        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
55042        assert_eq_m512(r, a);
55043        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
55044        let e = _mm512_set_ps(
55045            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55046        );
55047        assert_eq_m512(r, e);
55048    }
55049
55050    #[simd_test(enable = "avx512f")]
55051    unsafe fn test_mm512_maskz_unpackhi_ps() {
55052        let a = _mm512_set_ps(
55053            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55054        );
55055        let b = _mm512_set_ps(
55056            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55057        );
55058        let r = _mm512_maskz_unpackhi_ps(0, a, b);
55059        assert_eq_m512(r, _mm512_setzero_ps());
55060        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
55061        let e = _mm512_set_ps(
55062            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
55063        );
55064        assert_eq_m512(r, e);
55065    }
55066
55067    #[simd_test(enable = "avx512f,avx512vl")]
55068    unsafe fn test_mm256_mask_unpackhi_ps() {
55069        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55070        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55071        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
55072        assert_eq_m256(r, a);
55073        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
55074        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55075        assert_eq_m256(r, e);
55076    }
55077
55078    #[simd_test(enable = "avx512f,avx512vl")]
55079    unsafe fn test_mm256_maskz_unpackhi_ps() {
55080        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55081        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55082        let r = _mm256_maskz_unpackhi_ps(0, a, b);
55083        assert_eq_m256(r, _mm256_setzero_ps());
55084        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
55085        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55086        assert_eq_m256(r, e);
55087    }
55088
55089    #[simd_test(enable = "avx512f,avx512vl")]
55090    unsafe fn test_mm_mask_unpackhi_ps() {
55091        let a = _mm_set_ps(1., 2., 3., 4.);
55092        let b = _mm_set_ps(17., 18., 19., 20.);
55093        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
55094        assert_eq_m128(r, a);
55095        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
55096        let e = _mm_set_ps(17., 1., 18., 2.);
55097        assert_eq_m128(r, e);
55098    }
55099
55100    #[simd_test(enable = "avx512f,avx512vl")]
55101    unsafe fn test_mm_maskz_unpackhi_ps() {
55102        let a = _mm_set_ps(1., 2., 3., 4.);
55103        let b = _mm_set_ps(17., 18., 19., 20.);
55104        let r = _mm_maskz_unpackhi_ps(0, a, b);
55105        assert_eq_m128(r, _mm_setzero_ps());
55106        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
55107        let e = _mm_set_ps(17., 1., 18., 2.);
55108        assert_eq_m128(r, e);
55109    }
55110
55111    #[simd_test(enable = "avx512f")]
55112    unsafe fn test_mm512_unpacklo_epi32() {
55113        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55114        let b = _mm512_set_epi32(
55115            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55116        );
55117        let r = _mm512_unpacklo_epi32(a, b);
55118        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55119        assert_eq_m512i(r, e);
55120    }
55121
55122    #[simd_test(enable = "avx512f")]
55123    unsafe fn test_mm512_mask_unpacklo_epi32() {
55124        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55125        let b = _mm512_set_epi32(
55126            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55127        );
55128        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
55129        assert_eq_m512i(r, a);
55130        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
55131        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55132        assert_eq_m512i(r, e);
55133    }
55134
55135    #[simd_test(enable = "avx512f")]
55136    unsafe fn test_mm512_maskz_unpacklo_epi32() {
55137        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55138        let b = _mm512_set_epi32(
55139            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55140        );
55141        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
55142        assert_eq_m512i(r, _mm512_setzero_si512());
55143        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
55144        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
55145        assert_eq_m512i(r, e);
55146    }
55147
55148    #[simd_test(enable = "avx512f,avx512vl")]
55149    unsafe fn test_mm256_mask_unpacklo_epi32() {
55150        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55151        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55152        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
55153        assert_eq_m256i(r, a);
55154        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
55155        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55156        assert_eq_m256i(r, e);
55157    }
55158
55159    #[simd_test(enable = "avx512f,avx512vl")]
55160    unsafe fn test_mm256_maskz_unpacklo_epi32() {
55161        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55162        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55163        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
55164        assert_eq_m256i(r, _mm256_setzero_si256());
55165        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
55166        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55167        assert_eq_m256i(r, e);
55168    }
55169
55170    #[simd_test(enable = "avx512f,avx512vl")]
55171    unsafe fn test_mm_mask_unpacklo_epi32() {
55172        let a = _mm_set_epi32(1, 2, 3, 4);
55173        let b = _mm_set_epi32(17, 18, 19, 20);
55174        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
55175        assert_eq_m128i(r, a);
55176        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
55177        let e = _mm_set_epi32(19, 3, 20, 4);
55178        assert_eq_m128i(r, e);
55179    }
55180
55181    #[simd_test(enable = "avx512f,avx512vl")]
55182    unsafe fn test_mm_maskz_unpacklo_epi32() {
55183        let a = _mm_set_epi32(1, 2, 3, 4);
55184        let b = _mm_set_epi32(17, 18, 19, 20);
55185        let r = _mm_maskz_unpacklo_epi32(0, a, b);
55186        assert_eq_m128i(r, _mm_setzero_si128());
55187        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
55188        let e = _mm_set_epi32(19, 3, 20, 4);
55189        assert_eq_m128i(r, e);
55190    }
55191
55192    #[simd_test(enable = "avx512f")]
55193    unsafe fn test_mm512_unpacklo_ps() {
55194        let a = _mm512_set_ps(
55195            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55196        );
55197        let b = _mm512_set_ps(
55198            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55199        );
55200        let r = _mm512_unpacklo_ps(a, b);
55201        let e = _mm512_set_ps(
55202            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55203        );
55204        assert_eq_m512(r, e);
55205    }
55206
55207    #[simd_test(enable = "avx512f")]
55208    unsafe fn test_mm512_mask_unpacklo_ps() {
55209        let a = _mm512_set_ps(
55210            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55211        );
55212        let b = _mm512_set_ps(
55213            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55214        );
55215        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
55216        assert_eq_m512(r, a);
55217        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
55218        let e = _mm512_set_ps(
55219            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55220        );
55221        assert_eq_m512(r, e);
55222    }
55223
55224    #[simd_test(enable = "avx512f")]
55225    unsafe fn test_mm512_maskz_unpacklo_ps() {
55226        let a = _mm512_set_ps(
55227            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55228        );
55229        let b = _mm512_set_ps(
55230            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55231        );
55232        let r = _mm512_maskz_unpacklo_ps(0, a, b);
55233        assert_eq_m512(r, _mm512_setzero_ps());
55234        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
55235        let e = _mm512_set_ps(
55236            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
55237        );
55238        assert_eq_m512(r, e);
55239    }
55240
55241    #[simd_test(enable = "avx512f,avx512vl")]
55242    unsafe fn test_mm256_mask_unpacklo_ps() {
55243        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55244        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55245        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
55246        assert_eq_m256(r, a);
55247        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
55248        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55249        assert_eq_m256(r, e);
55250    }
55251
55252    #[simd_test(enable = "avx512f,avx512vl")]
55253    unsafe fn test_mm256_maskz_unpacklo_ps() {
55254        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55255        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55256        let r = _mm256_maskz_unpacklo_ps(0, a, b);
55257        assert_eq_m256(r, _mm256_setzero_ps());
55258        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
55259        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55260        assert_eq_m256(r, e);
55261    }
55262
55263    #[simd_test(enable = "avx512f,avx512vl")]
55264    unsafe fn test_mm_mask_unpacklo_ps() {
55265        let a = _mm_set_ps(1., 2., 3., 4.);
55266        let b = _mm_set_ps(17., 18., 19., 20.);
55267        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
55268        assert_eq_m128(r, a);
55269        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
55270        let e = _mm_set_ps(19., 3., 20., 4.);
55271        assert_eq_m128(r, e);
55272    }
55273
55274    #[simd_test(enable = "avx512f,avx512vl")]
55275    unsafe fn test_mm_maskz_unpacklo_ps() {
55276        let a = _mm_set_ps(1., 2., 3., 4.);
55277        let b = _mm_set_ps(17., 18., 19., 20.);
55278        let r = _mm_maskz_unpacklo_ps(0, a, b);
55279        assert_eq_m128(r, _mm_setzero_ps());
55280        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
55281        let e = _mm_set_ps(19., 3., 20., 4.);
55282        assert_eq_m128(r, e);
55283    }
55284
55285    #[simd_test(enable = "avx512f")]
55286    unsafe fn test_mm512_alignr_epi32() {
55287        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55288        let b = _mm512_set_epi32(
55289            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55290        );
55291        let r = _mm512_alignr_epi32::<0>(a, b);
55292        assert_eq_m512i(r, b);
55293        let r = _mm512_alignr_epi32::<16>(a, b);
55294        assert_eq_m512i(r, b);
55295        let r = _mm512_alignr_epi32::<1>(a, b);
55296        let e = _mm512_set_epi32(
55297            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55298        );
55299        assert_eq_m512i(r, e);
55300    }
55301
55302    #[simd_test(enable = "avx512f")]
55303    unsafe fn test_mm512_mask_alignr_epi32() {
55304        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55305        let b = _mm512_set_epi32(
55306            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55307        );
55308        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
55309        assert_eq_m512i(r, a);
55310        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
55311        let e = _mm512_set_epi32(
55312            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55313        );
55314        assert_eq_m512i(r, e);
55315    }
55316
55317    #[simd_test(enable = "avx512f")]
55318    unsafe fn test_mm512_maskz_alignr_epi32() {
55319        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55320        let b = _mm512_set_epi32(
55321            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55322        );
55323        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
55324        assert_eq_m512i(r, _mm512_setzero_si512());
55325        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
55326        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
55327        assert_eq_m512i(r, e);
55328    }
55329
55330    #[simd_test(enable = "avx512f,avx512vl")]
55331    unsafe fn test_mm256_alignr_epi32() {
55332        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55333        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55334        let r = _mm256_alignr_epi32::<0>(a, b);
55335        assert_eq_m256i(r, b);
55336        let r = _mm256_alignr_epi32::<1>(a, b);
55337        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55338        assert_eq_m256i(r, e);
55339    }
55340
55341    #[simd_test(enable = "avx512f,avx512vl")]
55342    unsafe fn test_mm256_mask_alignr_epi32() {
55343        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55344        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55345        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
55346        assert_eq_m256i(r, a);
55347        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
55348        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55349        assert_eq_m256i(r, e);
55350    }
55351
55352    #[simd_test(enable = "avx512f,avx512vl")]
55353    unsafe fn test_mm256_maskz_alignr_epi32() {
55354        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55355        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55356        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
55357        assert_eq_m256i(r, _mm256_setzero_si256());
55358        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
55359        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55360        assert_eq_m256i(r, e);
55361    }
55362
55363    #[simd_test(enable = "avx512f,avx512vl")]
55364    unsafe fn test_mm_alignr_epi32() {
55365        let a = _mm_set_epi32(4, 3, 2, 1);
55366        let b = _mm_set_epi32(8, 7, 6, 5);
55367        let r = _mm_alignr_epi32::<0>(a, b);
55368        assert_eq_m128i(r, b);
55369        let r = _mm_alignr_epi32::<1>(a, b);
55370        let e = _mm_set_epi32(1, 8, 7, 6);
55371        assert_eq_m128i(r, e);
55372    }
55373
55374    #[simd_test(enable = "avx512f,avx512vl")]
55375    unsafe fn test_mm_mask_alignr_epi32() {
55376        let a = _mm_set_epi32(4, 3, 2, 1);
55377        let b = _mm_set_epi32(8, 7, 6, 5);
55378        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
55379        assert_eq_m128i(r, a);
55380        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
55381        let e = _mm_set_epi32(1, 8, 7, 6);
55382        assert_eq_m128i(r, e);
55383    }
55384
55385    #[simd_test(enable = "avx512f,avx512vl")]
55386    unsafe fn test_mm_maskz_alignr_epi32() {
55387        let a = _mm_set_epi32(4, 3, 2, 1);
55388        let b = _mm_set_epi32(8, 7, 6, 5);
55389        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
55390        assert_eq_m128i(r, _mm_setzero_si128());
55391        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
55392        let e = _mm_set_epi32(1, 8, 7, 6);
55393        assert_eq_m128i(r, e);
55394    }
55395
55396    #[simd_test(enable = "avx512f")]
55397    unsafe fn test_mm512_and_epi32() {
55398        #[rustfmt::skip]
55399        let a = _mm512_set_epi32(
55400            1 << 1 | 1 << 2, 0, 0, 0,
55401            0, 0, 0, 0,
55402            0, 0, 0, 0,
55403            0, 0, 0, 1 << 1 | 1 << 3,
55404        );
55405        #[rustfmt::skip]
55406        let b = _mm512_set_epi32(
55407            1 << 1, 0, 0, 0,
55408            0, 0, 0, 0,
55409            0, 0, 0, 0,
55410            0, 0, 0, 1 << 3 | 1 << 4,
55411        );
55412        let r = _mm512_and_epi32(a, b);
55413        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55414        assert_eq_m512i(r, e);
55415    }
55416
55417    #[simd_test(enable = "avx512f")]
55418    unsafe fn test_mm512_mask_and_epi32() {
55419        #[rustfmt::skip]
55420        let a = _mm512_set_epi32(
55421            1 << 1 | 1 << 2, 0, 0, 0,
55422            0, 0, 0, 0,
55423            0, 0, 0, 0,
55424            0, 0, 0, 1 << 1 | 1 << 3,
55425        );
55426        #[rustfmt::skip]
55427        let b = _mm512_set_epi32(
55428            1 << 1, 0, 0, 0,
55429            0, 0, 0, 0,
55430            0, 0, 0, 0,
55431            0, 0, 0, 1 << 3 | 1 << 4,
55432        );
55433        let r = _mm512_mask_and_epi32(a, 0, a, b);
55434        assert_eq_m512i(r, a);
55435        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
55436        #[rustfmt::skip]
55437        let e = _mm512_set_epi32(
55438            1 << 1 | 1 << 2, 0, 0, 0,
55439            0, 0, 0, 0,
55440            0, 0, 0, 0,
55441            0, 0, 0, 1 << 3,
55442        );
55443        assert_eq_m512i(r, e);
55444    }
55445
55446    #[simd_test(enable = "avx512f")]
55447    unsafe fn test_mm512_maskz_and_epi32() {
55448        #[rustfmt::skip]
55449        let a = _mm512_set_epi32(
55450            1 << 1 | 1 << 2, 0, 0, 0,
55451            0, 0, 0, 0,
55452            0, 0, 0, 0,
55453            0, 0, 0, 1 << 1 | 1 << 3,
55454        );
55455        #[rustfmt::skip]
55456        let b = _mm512_set_epi32(
55457            1 << 1, 0, 0, 0,
55458            0, 0, 0, 0,
55459            0, 0, 0, 0,
55460            0, 0, 0, 1 << 3 | 1 << 4,
55461        );
55462        let r = _mm512_maskz_and_epi32(0, a, b);
55463        assert_eq_m512i(r, _mm512_setzero_si512());
55464        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
55465        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55466        assert_eq_m512i(r, e);
55467    }
55468
55469    #[simd_test(enable = "avx512f,avx512vl")]
55470    unsafe fn test_mm256_mask_and_epi32() {
55471        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55472        let b = _mm256_set1_epi32(1 << 1);
55473        let r = _mm256_mask_and_epi32(a, 0, a, b);
55474        assert_eq_m256i(r, a);
55475        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
55476        let e = _mm256_set1_epi32(1 << 1);
55477        assert_eq_m256i(r, e);
55478    }
55479
55480    #[simd_test(enable = "avx512f,avx512vl")]
55481    unsafe fn test_mm256_maskz_and_epi32() {
55482        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55483        let b = _mm256_set1_epi32(1 << 1);
55484        let r = _mm256_maskz_and_epi32(0, a, b);
55485        assert_eq_m256i(r, _mm256_setzero_si256());
55486        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
55487        let e = _mm256_set1_epi32(1 << 1);
55488        assert_eq_m256i(r, e);
55489    }
55490
55491    #[simd_test(enable = "avx512f,avx512vl")]
55492    unsafe fn test_mm_mask_and_epi32() {
55493        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55494        let b = _mm_set1_epi32(1 << 1);
55495        let r = _mm_mask_and_epi32(a, 0, a, b);
55496        assert_eq_m128i(r, a);
55497        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
55498        let e = _mm_set1_epi32(1 << 1);
55499        assert_eq_m128i(r, e);
55500    }
55501
55502    #[simd_test(enable = "avx512f,avx512vl")]
55503    unsafe fn test_mm_maskz_and_epi32() {
55504        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55505        let b = _mm_set1_epi32(1 << 1);
55506        let r = _mm_maskz_and_epi32(0, a, b);
55507        assert_eq_m128i(r, _mm_setzero_si128());
55508        let r = _mm_maskz_and_epi32(0b00001111, a, b);
55509        let e = _mm_set1_epi32(1 << 1);
55510        assert_eq_m128i(r, e);
55511    }
55512
55513    #[simd_test(enable = "avx512f")]
55514    unsafe fn test_mm512_and_si512() {
55515        #[rustfmt::skip]
55516        let a = _mm512_set_epi32(
55517            1 << 1 | 1 << 2, 0, 0, 0,
55518            0, 0, 0, 0,
55519            0, 0, 0, 0,
55520            0, 0, 0, 1 << 1 | 1 << 3,
55521        );
55522        #[rustfmt::skip]
55523        let b = _mm512_set_epi32(
55524            1 << 1, 0, 0, 0,
55525            0, 0, 0, 0,
55526            0, 0, 0, 0,
55527            0, 0, 0, 1 << 3 | 1 << 4,
55528        );
55529        let r = _mm512_and_epi32(a, b);
55530        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55531        assert_eq_m512i(r, e);
55532    }
55533
55534    #[simd_test(enable = "avx512f")]
55535    unsafe fn test_mm512_or_epi32() {
55536        #[rustfmt::skip]
55537        let a = _mm512_set_epi32(
55538            1 << 1 | 1 << 2, 0, 0, 0,
55539            0, 0, 0, 0,
55540            0, 0, 0, 0,
55541            0, 0, 0, 1 << 1 | 1 << 3,
55542        );
55543        #[rustfmt::skip]
55544        let b = _mm512_set_epi32(
55545            1 << 1, 0, 0, 0,
55546            0, 0, 0, 0,
55547            0, 0, 0, 0,
55548            0, 0, 0, 1 << 3 | 1 << 4,
55549        );
55550        let r = _mm512_or_epi32(a, b);
55551        #[rustfmt::skip]
55552        let e = _mm512_set_epi32(
55553            1 << 1 | 1 << 2, 0, 0, 0,
55554            0, 0, 0, 0,
55555            0, 0, 0, 0,
55556            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55557        );
55558        assert_eq_m512i(r, e);
55559    }
55560
55561    #[simd_test(enable = "avx512f")]
55562    unsafe fn test_mm512_mask_or_epi32() {
55563        #[rustfmt::skip]
55564        let a = _mm512_set_epi32(
55565            1 << 1 | 1 << 2, 0, 0, 0,
55566            0, 0, 0, 0,
55567            0, 0, 0, 0,
55568            0, 0, 0, 1 << 1 | 1 << 3,
55569        );
55570        #[rustfmt::skip]
55571        let b = _mm512_set_epi32(
55572            1 << 1, 0, 0, 0,
55573            0, 0, 0, 0,
55574            0, 0, 0, 0,
55575            0, 0, 0, 1 << 3 | 1 << 4,
55576        );
55577        let r = _mm512_mask_or_epi32(a, 0, a, b);
55578        assert_eq_m512i(r, a);
55579        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
55580        #[rustfmt::skip]
55581        let e = _mm512_set_epi32(
55582            1 << 1 | 1 << 2, 0, 0, 0,
55583            0, 0, 0, 0,
55584            0, 0, 0, 0,
55585            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55586        );
55587        assert_eq_m512i(r, e);
55588    }
55589
55590    #[simd_test(enable = "avx512f")]
55591    unsafe fn test_mm512_maskz_or_epi32() {
55592        #[rustfmt::skip]
55593        let a = _mm512_set_epi32(
55594            1 << 1 | 1 << 2, 0, 0, 0,
55595            0, 0, 0, 0,
55596            0, 0, 0, 0,
55597            0, 0, 0, 1 << 1 | 1 << 3,
55598        );
55599        #[rustfmt::skip]
55600        let b = _mm512_set_epi32(
55601            1 << 1, 0, 0, 0,
55602            0, 0, 0, 0,
55603            0, 0, 0, 0,
55604            0, 0, 0, 1 << 3 | 1 << 4,
55605        );
55606        let r = _mm512_maskz_or_epi32(0, a, b);
55607        assert_eq_m512i(r, _mm512_setzero_si512());
55608        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
55609        #[rustfmt::skip]
55610        let e = _mm512_set_epi32(
55611            0, 0, 0, 0,
55612            0, 0, 0, 0,
55613            0, 0, 0, 0,
55614            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55615        );
55616        assert_eq_m512i(r, e);
55617    }
55618
55619    #[simd_test(enable = "avx512f,avx512vl")]
55620    unsafe fn test_mm256_or_epi32() {
55621        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55622        let b = _mm256_set1_epi32(1 << 1);
55623        let r = _mm256_or_epi32(a, b);
55624        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55625        assert_eq_m256i(r, e);
55626    }
55627
55628    #[simd_test(enable = "avx512f,avx512vl")]
55629    unsafe fn test_mm256_mask_or_epi32() {
55630        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55631        let b = _mm256_set1_epi32(1 << 1);
55632        let r = _mm256_mask_or_epi32(a, 0, a, b);
55633        assert_eq_m256i(r, a);
55634        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
55635        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55636        assert_eq_m256i(r, e);
55637    }
55638
55639    #[simd_test(enable = "avx512f,avx512vl")]
55640    unsafe fn test_mm256_maskz_or_epi32() {
55641        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55642        let b = _mm256_set1_epi32(1 << 1);
55643        let r = _mm256_maskz_or_epi32(0, a, b);
55644        assert_eq_m256i(r, _mm256_setzero_si256());
55645        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
55646        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55647        assert_eq_m256i(r, e);
55648    }
55649
55650    #[simd_test(enable = "avx512f,avx512vl")]
55651    unsafe fn test_mm_or_epi32() {
55652        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55653        let b = _mm_set1_epi32(1 << 1);
55654        let r = _mm_or_epi32(a, b);
55655        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55656        assert_eq_m128i(r, e);
55657    }
55658
55659    #[simd_test(enable = "avx512f,avx512vl")]
55660    unsafe fn test_mm_mask_or_epi32() {
55661        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55662        let b = _mm_set1_epi32(1 << 1);
55663        let r = _mm_mask_or_epi32(a, 0, a, b);
55664        assert_eq_m128i(r, a);
55665        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
55666        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55667        assert_eq_m128i(r, e);
55668    }
55669
55670    #[simd_test(enable = "avx512f,avx512vl")]
55671    unsafe fn test_mm_maskz_or_epi32() {
55672        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55673        let b = _mm_set1_epi32(1 << 1);
55674        let r = _mm_maskz_or_epi32(0, a, b);
55675        assert_eq_m128i(r, _mm_setzero_si128());
55676        let r = _mm_maskz_or_epi32(0b00001111, a, b);
55677        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55678        assert_eq_m128i(r, e);
55679    }
55680
55681    #[simd_test(enable = "avx512f")]
55682    unsafe fn test_mm512_or_si512() {
55683        #[rustfmt::skip]
55684        let a = _mm512_set_epi32(
55685            1 << 1 | 1 << 2, 0, 0, 0,
55686            0, 0, 0, 0,
55687            0, 0, 0, 0,
55688            0, 0, 0, 1 << 1 | 1 << 3,
55689        );
55690        #[rustfmt::skip]
55691        let b = _mm512_set_epi32(
55692            1 << 1, 0, 0, 0,
55693            0, 0, 0, 0,
55694            0, 0, 0, 0,
55695            0, 0, 0, 1 << 3 | 1 << 4,
55696        );
55697        let r = _mm512_or_epi32(a, b);
55698        #[rustfmt::skip]
55699        let e = _mm512_set_epi32(
55700            1 << 1 | 1 << 2, 0, 0, 0,
55701            0, 0, 0, 0,
55702            0, 0, 0, 0,
55703            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55704        );
55705        assert_eq_m512i(r, e);
55706    }
55707
55708    #[simd_test(enable = "avx512f")]
55709    unsafe fn test_mm512_xor_epi32() {
55710        #[rustfmt::skip]
55711        let a = _mm512_set_epi32(
55712            1 << 1 | 1 << 2, 0, 0, 0,
55713            0, 0, 0, 0,
55714            0, 0, 0, 0,
55715            0, 0, 0, 1 << 1 | 1 << 3,
55716        );
55717        #[rustfmt::skip]
55718        let b = _mm512_set_epi32(
55719            1 << 1, 0, 0, 0,
55720            0, 0, 0, 0,
55721            0, 0, 0, 0,
55722            0, 0, 0, 1 << 3 | 1 << 4,
55723        );
55724        let r = _mm512_xor_epi32(a, b);
55725        #[rustfmt::skip]
55726        let e = _mm512_set_epi32(
55727            1 << 2, 0, 0, 0,
55728            0, 0, 0, 0,
55729            0, 0, 0, 0,
55730            0, 0, 0, 1 << 1 | 1 << 4,
55731        );
55732        assert_eq_m512i(r, e);
55733    }
55734
55735    #[simd_test(enable = "avx512f")]
55736    unsafe fn test_mm512_mask_xor_epi32() {
55737        #[rustfmt::skip]
55738        let a = _mm512_set_epi32(
55739            1 << 1 | 1 << 2, 0, 0, 0,
55740            0, 0, 0, 0,
55741            0, 0, 0, 0,
55742            0, 0, 0, 1 << 1 | 1 << 3,
55743        );
55744        #[rustfmt::skip]
55745        let b = _mm512_set_epi32(
55746            1 << 1, 0, 0, 0,
55747            0, 0, 0, 0,
55748            0, 0, 0, 0,
55749            0, 0, 0, 1 << 3 | 1 << 4,
55750        );
55751        let r = _mm512_mask_xor_epi32(a, 0, a, b);
55752        assert_eq_m512i(r, a);
55753        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
55754        #[rustfmt::skip]
55755        let e = _mm512_set_epi32(
55756            1 << 1 | 1 << 2, 0, 0, 0,
55757            0, 0, 0, 0,
55758            0, 0, 0, 0,
55759            0, 0, 0, 1 << 1 | 1 << 4,
55760        );
55761        assert_eq_m512i(r, e);
55762    }
55763
55764    #[simd_test(enable = "avx512f")]
55765    unsafe fn test_mm512_maskz_xor_epi32() {
55766        #[rustfmt::skip]
55767        let a = _mm512_set_epi32(
55768            1 << 1 | 1 << 2, 0, 0, 0,
55769            0, 0, 0, 0,
55770            0, 0, 0, 0,
55771            0, 0, 0, 1 << 1 | 1 << 3,
55772        );
55773        #[rustfmt::skip]
55774        let b = _mm512_set_epi32(
55775            1 << 1, 0, 0, 0,
55776            0, 0, 0, 0,
55777            0, 0, 0, 0,
55778            0, 0, 0, 1 << 3 | 1 << 4,
55779        );
55780        let r = _mm512_maskz_xor_epi32(0, a, b);
55781        assert_eq_m512i(r, _mm512_setzero_si512());
55782        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
55783        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
55784        assert_eq_m512i(r, e);
55785    }
55786
55787    #[simd_test(enable = "avx512f,avx512vl")]
55788    unsafe fn test_mm256_xor_epi32() {
55789        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55790        let b = _mm256_set1_epi32(1 << 1);
55791        let r = _mm256_xor_epi32(a, b);
55792        let e = _mm256_set1_epi32(1 << 2);
55793        assert_eq_m256i(r, e);
55794    }
55795
55796    #[simd_test(enable = "avx512f,avx512vl")]
55797    unsafe fn test_mm256_mask_xor_epi32() {
55798        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55799        let b = _mm256_set1_epi32(1 << 1);
55800        let r = _mm256_mask_xor_epi32(a, 0, a, b);
55801        assert_eq_m256i(r, a);
55802        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
55803        let e = _mm256_set1_epi32(1 << 2);
55804        assert_eq_m256i(r, e);
55805    }
55806
55807    #[simd_test(enable = "avx512f,avx512vl")]
55808    unsafe fn test_mm256_maskz_xor_epi32() {
55809        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55810        let b = _mm256_set1_epi32(1 << 1);
55811        let r = _mm256_maskz_xor_epi32(0, a, b);
55812        assert_eq_m256i(r, _mm256_setzero_si256());
55813        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
55814        let e = _mm256_set1_epi32(1 << 2);
55815        assert_eq_m256i(r, e);
55816    }
55817
55818    #[simd_test(enable = "avx512f,avx512vl")]
55819    unsafe fn test_mm_xor_epi32() {
55820        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55821        let b = _mm_set1_epi32(1 << 1);
55822        let r = _mm_xor_epi32(a, b);
55823        let e = _mm_set1_epi32(1 << 2);
55824        assert_eq_m128i(r, e);
55825    }
55826
55827    #[simd_test(enable = "avx512f,avx512vl")]
55828    unsafe fn test_mm_mask_xor_epi32() {
55829        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55830        let b = _mm_set1_epi32(1 << 1);
55831        let r = _mm_mask_xor_epi32(a, 0, a, b);
55832        assert_eq_m128i(r, a);
55833        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
55834        let e = _mm_set1_epi32(1 << 2);
55835        assert_eq_m128i(r, e);
55836    }
55837
55838    #[simd_test(enable = "avx512f,avx512vl")]
55839    unsafe fn test_mm_maskz_xor_epi32() {
55840        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55841        let b = _mm_set1_epi32(1 << 1);
55842        let r = _mm_maskz_xor_epi32(0, a, b);
55843        assert_eq_m128i(r, _mm_setzero_si128());
55844        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
55845        let e = _mm_set1_epi32(1 << 2);
55846        assert_eq_m128i(r, e);
55847    }
55848
55849    #[simd_test(enable = "avx512f")]
55850    unsafe fn test_mm512_xor_si512() {
55851        #[rustfmt::skip]
55852        let a = _mm512_set_epi32(
55853            1 << 1 | 1 << 2, 0, 0, 0,
55854            0, 0, 0, 0,
55855            0, 0, 0, 0,
55856            0, 0, 0, 1 << 1 | 1 << 3,
55857        );
55858        #[rustfmt::skip]
55859        let b = _mm512_set_epi32(
55860            1 << 1, 0, 0, 0,
55861            0, 0, 0, 0,
55862            0, 0, 0, 0,
55863            0, 0, 0, 1 << 3 | 1 << 4,
55864        );
55865        let r = _mm512_xor_epi32(a, b);
55866        #[rustfmt::skip]
55867        let e = _mm512_set_epi32(
55868            1 << 2, 0, 0, 0,
55869            0, 0, 0, 0,
55870            0, 0, 0, 0,
55871            0, 0, 0, 1 << 1 | 1 << 4,
55872        );
55873        assert_eq_m512i(r, e);
55874    }
55875
55876    #[simd_test(enable = "avx512f")]
55877    unsafe fn test_mm512_andnot_epi32() {
55878        let a = _mm512_set1_epi32(0);
55879        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55880        let r = _mm512_andnot_epi32(a, b);
55881        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55882        assert_eq_m512i(r, e);
55883    }
55884
55885    #[simd_test(enable = "avx512f")]
55886    unsafe fn test_mm512_mask_andnot_epi32() {
55887        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55888        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55889        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
55890        assert_eq_m512i(r, a);
55891        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
55892        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55893        assert_eq_m512i(r, e);
55894    }
55895
55896    #[simd_test(enable = "avx512f")]
55897    unsafe fn test_mm512_maskz_andnot_epi32() {
55898        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55899        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55900        let r = _mm512_maskz_andnot_epi32(0, a, b);
55901        assert_eq_m512i(r, _mm512_setzero_si512());
55902        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
55903        #[rustfmt::skip]
55904        let e = _mm512_set_epi32(
55905            0, 0, 0, 0,
55906            0, 0, 0, 0,
55907            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55908            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55909        );
55910        assert_eq_m512i(r, e);
55911    }
55912
55913    #[simd_test(enable = "avx512f,avx512vl")]
55914    unsafe fn test_mm256_mask_andnot_epi32() {
55915        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55916        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55917        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
55918        assert_eq_m256i(r, a);
55919        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
55920        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55921        assert_eq_m256i(r, e);
55922    }
55923
55924    #[simd_test(enable = "avx512f,avx512vl")]
55925    unsafe fn test_mm256_maskz_andnot_epi32() {
55926        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55927        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55928        let r = _mm256_maskz_andnot_epi32(0, a, b);
55929        assert_eq_m256i(r, _mm256_setzero_si256());
55930        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
55931        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55932        assert_eq_m256i(r, e);
55933    }
55934
55935    #[simd_test(enable = "avx512f,avx512vl")]
55936    unsafe fn test_mm_mask_andnot_epi32() {
55937        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55938        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55939        let r = _mm_mask_andnot_epi32(a, 0, a, b);
55940        assert_eq_m128i(r, a);
55941        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
55942        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55943        assert_eq_m128i(r, e);
55944    }
55945
55946    #[simd_test(enable = "avx512f,avx512vl")]
55947    unsafe fn test_mm_maskz_andnot_epi32() {
55948        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55949        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55950        let r = _mm_maskz_andnot_epi32(0, a, b);
55951        assert_eq_m128i(r, _mm_setzero_si128());
55952        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
55953        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55954        assert_eq_m128i(r, e);
55955    }
55956
55957    #[simd_test(enable = "avx512f")]
55958    unsafe fn test_cvtmask16_u32() {
55959        let a: __mmask16 = 0b11001100_00110011;
55960        let r = _cvtmask16_u32(a);
55961        let e: u32 = 0b11001100_00110011;
55962        assert_eq!(r, e);
55963    }
55964
55965    #[simd_test(enable = "avx512f")]
55966    unsafe fn test_cvtu32_mask16() {
55967        let a: u32 = 0b11001100_00110011;
55968        let r = _cvtu32_mask16(a);
55969        let e: __mmask16 = 0b11001100_00110011;
55970        assert_eq!(r, e);
55971    }
55972
55973    #[simd_test(enable = "avx512f")]
55974    unsafe fn test_mm512_kand() {
55975        let a: u16 = 0b11001100_00110011;
55976        let b: u16 = 0b11001100_00110011;
55977        let r = _mm512_kand(a, b);
55978        let e: u16 = 0b11001100_00110011;
55979        assert_eq!(r, e);
55980    }
55981
55982    #[simd_test(enable = "avx512f")]
55983    unsafe fn test_kand_mask16() {
55984        let a: u16 = 0b11001100_00110011;
55985        let b: u16 = 0b11001100_00110011;
55986        let r = _kand_mask16(a, b);
55987        let e: u16 = 0b11001100_00110011;
55988        assert_eq!(r, e);
55989    }
55990
55991    #[simd_test(enable = "avx512f")]
55992    unsafe fn test_mm512_kor() {
55993        let a: u16 = 0b11001100_00110011;
55994        let b: u16 = 0b00101110_00001011;
55995        let r = _mm512_kor(a, b);
55996        let e: u16 = 0b11101110_00111011;
55997        assert_eq!(r, e);
55998    }
55999
56000    #[simd_test(enable = "avx512f")]
56001    unsafe fn test_kor_mask16() {
56002        let a: u16 = 0b11001100_00110011;
56003        let b: u16 = 0b00101110_00001011;
56004        let r = _kor_mask16(a, b);
56005        let e: u16 = 0b11101110_00111011;
56006        assert_eq!(r, e);
56007    }
56008
56009    #[simd_test(enable = "avx512f")]
56010    unsafe fn test_mm512_kxor() {
56011        let a: u16 = 0b11001100_00110011;
56012        let b: u16 = 0b00101110_00001011;
56013        let r = _mm512_kxor(a, b);
56014        let e: u16 = 0b11100010_00111000;
56015        assert_eq!(r, e);
56016    }
56017
56018    #[simd_test(enable = "avx512f")]
56019    unsafe fn test_kxor_mask16() {
56020        let a: u16 = 0b11001100_00110011;
56021        let b: u16 = 0b00101110_00001011;
56022        let r = _kxor_mask16(a, b);
56023        let e: u16 = 0b11100010_00111000;
56024        assert_eq!(r, e);
56025    }
56026
56027    #[simd_test(enable = "avx512f")]
56028    unsafe fn test_mm512_knot() {
56029        let a: u16 = 0b11001100_00110011;
56030        let r = _mm512_knot(a);
56031        let e: u16 = 0b00110011_11001100;
56032        assert_eq!(r, e);
56033    }
56034
56035    #[simd_test(enable = "avx512f")]
56036    unsafe fn test_knot_mask16() {
56037        let a: u16 = 0b11001100_00110011;
56038        let r = _knot_mask16(a);
56039        let e: u16 = 0b00110011_11001100;
56040        assert_eq!(r, e);
56041    }
56042
56043    #[simd_test(enable = "avx512f")]
56044    unsafe fn test_mm512_kandn() {
56045        let a: u16 = 0b11001100_00110011;
56046        let b: u16 = 0b00101110_00001011;
56047        let r = _mm512_kandn(a, b);
56048        let e: u16 = 0b00100010_00001000;
56049        assert_eq!(r, e);
56050    }
56051
56052    #[simd_test(enable = "avx512f")]
56053    unsafe fn test_kandn_mask16() {
56054        let a: u16 = 0b11001100_00110011;
56055        let b: u16 = 0b00101110_00001011;
56056        let r = _kandn_mask16(a, b);
56057        let e: u16 = 0b00100010_00001000;
56058        assert_eq!(r, e);
56059    }
56060
56061    #[simd_test(enable = "avx512f")]
56062    unsafe fn test_mm512_kxnor() {
56063        let a: u16 = 0b11001100_00110011;
56064        let b: u16 = 0b00101110_00001011;
56065        let r = _mm512_kxnor(a, b);
56066        let e: u16 = 0b00011101_11000111;
56067        assert_eq!(r, e);
56068    }
56069
56070    #[simd_test(enable = "avx512f")]
56071    unsafe fn test_kxnor_mask16() {
56072        let a: u16 = 0b11001100_00110011;
56073        let b: u16 = 0b00101110_00001011;
56074        let r = _kxnor_mask16(a, b);
56075        let e: u16 = 0b00011101_11000111;
56076        assert_eq!(r, e);
56077    }
56078
56079    #[simd_test(enable = "avx512f")]
56080    unsafe fn test_kortest_mask16_u8() {
56081        let a: __mmask16 = 0b0110100101101001;
56082        let b: __mmask16 = 0b1011011010110110;
56083        let mut all_ones: u8 = 0;
56084        let r = _kortest_mask16_u8(a, b, &mut all_ones);
56085        assert_eq!(r, 0);
56086        assert_eq!(all_ones, 1);
56087    }
56088
56089    #[simd_test(enable = "avx512f")]
56090    unsafe fn test_kortestc_mask16_u8() {
56091        let a: __mmask16 = 0b0110100101101001;
56092        let b: __mmask16 = 0b1011011010110110;
56093        let r = _kortestc_mask16_u8(a, b);
56094        assert_eq!(r, 1);
56095    }
56096
56097    #[simd_test(enable = "avx512f")]
56098    unsafe fn test_kortestz_mask16_u8() {
56099        let a: __mmask16 = 0b0110100101101001;
56100        let b: __mmask16 = 0b1011011010110110;
56101        let r = _kortestz_mask16_u8(a, b);
56102        assert_eq!(r, 0);
56103    }
56104
56105    #[simd_test(enable = "avx512f")]
56106    unsafe fn test_kshiftli_mask16() {
56107        let a: __mmask16 = 0b1001011011000011;
56108        let r = _kshiftli_mask16::<3>(a);
56109        let e: __mmask16 = 0b1011011000011000;
56110        assert_eq!(r, e);
56111
56112        let r = _kshiftli_mask16::<15>(a);
56113        let e: __mmask16 = 0b1000000000000000;
56114        assert_eq!(r, e);
56115
56116        let r = _kshiftli_mask16::<16>(a);
56117        let e: __mmask16 = 0b0000000000000000;
56118        assert_eq!(r, e);
56119
56120        let r = _kshiftli_mask16::<17>(a);
56121        let e: __mmask16 = 0b0000000000000000;
56122        assert_eq!(r, e);
56123    }
56124
56125    #[simd_test(enable = "avx512f")]
56126    unsafe fn test_kshiftri_mask16() {
56127        let a: __mmask16 = 0b1010100100111100;
56128        let r = _kshiftri_mask16::<3>(a);
56129        let e: __mmask16 = 0b0001010100100111;
56130        assert_eq!(r, e);
56131
56132        let r = _kshiftri_mask16::<15>(a);
56133        let e: __mmask16 = 0b0000000000000001;
56134        assert_eq!(r, e);
56135
56136        let r = _kshiftri_mask16::<16>(a);
56137        let e: __mmask16 = 0b0000000000000000;
56138        assert_eq!(r, e);
56139
56140        let r = _kshiftri_mask16::<17>(a);
56141        let e: __mmask16 = 0b0000000000000000;
56142        assert_eq!(r, e);
56143    }
56144
56145    #[simd_test(enable = "avx512f")]
56146    unsafe fn test_load_mask16() {
56147        let a: __mmask16 = 0b1001011011000011;
56148        let r = _load_mask16(&a);
56149        let e: __mmask16 = 0b1001011011000011;
56150        assert_eq!(r, e);
56151    }
56152
56153    #[simd_test(enable = "avx512f")]
56154    unsafe fn test_store_mask16() {
56155        let a: __mmask16 = 0b0110100100111100;
56156        let mut r = 0;
56157        _store_mask16(&mut r, a);
56158        let e: __mmask16 = 0b0110100100111100;
56159        assert_eq!(r, e);
56160    }
56161
56162    #[simd_test(enable = "avx512f")]
56163    unsafe fn test_mm512_kmov() {
56164        let a: u16 = 0b11001100_00110011;
56165        let r = _mm512_kmov(a);
56166        let e: u16 = 0b11001100_00110011;
56167        assert_eq!(r, e);
56168    }
56169
56170    #[simd_test(enable = "avx512f")]
56171    unsafe fn test_mm512_int2mask() {
56172        let a: i32 = 0b11001100_00110011;
56173        let r = _mm512_int2mask(a);
56174        let e: u16 = 0b11001100_00110011;
56175        assert_eq!(r, e);
56176    }
56177
56178    #[simd_test(enable = "avx512f")]
56179    unsafe fn test_mm512_mask2int() {
56180        let k1: __mmask16 = 0b11001100_00110011;
56181        let r = _mm512_mask2int(k1);
56182        let e: i32 = 0b11001100_00110011;
56183        assert_eq!(r, e);
56184    }
56185
56186    #[simd_test(enable = "avx512f")]
56187    unsafe fn test_mm512_kunpackb() {
56188        let a: u16 = 0b11001100_00110011;
56189        let b: u16 = 0b00101110_00001011;
56190        let r = _mm512_kunpackb(a, b);
56191        let e: u16 = 0b00110011_00001011;
56192        assert_eq!(r, e);
56193    }
56194
56195    #[simd_test(enable = "avx512f")]
56196    unsafe fn test_mm512_kortestc() {
56197        let a: u16 = 0b11001100_00110011;
56198        let b: u16 = 0b00101110_00001011;
56199        let r = _mm512_kortestc(a, b);
56200        assert_eq!(r, 0);
56201        let b: u16 = 0b11111111_11111111;
56202        let r = _mm512_kortestc(a, b);
56203        assert_eq!(r, 1);
56204    }
56205
56206    #[simd_test(enable = "avx512f")]
56207    unsafe fn test_mm512_kortestz() {
56208        let a: u16 = 0b11001100_00110011;
56209        let b: u16 = 0b00101110_00001011;
56210        let r = _mm512_kortestz(a, b);
56211        assert_eq!(r, 0);
56212        let r = _mm512_kortestz(0, 0);
56213        assert_eq!(r, 1);
56214    }
56215
56216    #[simd_test(enable = "avx512f")]
56217    unsafe fn test_mm512_test_epi32_mask() {
56218        let a = _mm512_set1_epi32(1 << 0);
56219        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56220        let r = _mm512_test_epi32_mask(a, b);
56221        let e: __mmask16 = 0b11111111_11111111;
56222        assert_eq!(r, e);
56223    }
56224
56225    #[simd_test(enable = "avx512f")]
56226    unsafe fn test_mm512_mask_test_epi32_mask() {
56227        let a = _mm512_set1_epi32(1 << 0);
56228        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56229        let r = _mm512_mask_test_epi32_mask(0, a, b);
56230        assert_eq!(r, 0);
56231        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
56232        let e: __mmask16 = 0b11111111_11111111;
56233        assert_eq!(r, e);
56234    }
56235
56236    #[simd_test(enable = "avx512f,avx512vl")]
56237    unsafe fn test_mm256_test_epi32_mask() {
56238        let a = _mm256_set1_epi32(1 << 0);
56239        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56240        let r = _mm256_test_epi32_mask(a, b);
56241        let e: __mmask8 = 0b11111111;
56242        assert_eq!(r, e);
56243    }
56244
56245    #[simd_test(enable = "avx512f,avx512vl")]
56246    unsafe fn test_mm256_mask_test_epi32_mask() {
56247        let a = _mm256_set1_epi32(1 << 0);
56248        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56249        let r = _mm256_mask_test_epi32_mask(0, a, b);
56250        assert_eq!(r, 0);
56251        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
56252        let e: __mmask8 = 0b11111111;
56253        assert_eq!(r, e);
56254    }
56255
56256    #[simd_test(enable = "avx512f,avx512vl")]
56257    unsafe fn test_mm_test_epi32_mask() {
56258        let a = _mm_set1_epi32(1 << 0);
56259        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56260        let r = _mm_test_epi32_mask(a, b);
56261        let e: __mmask8 = 0b00001111;
56262        assert_eq!(r, e);
56263    }
56264
56265    #[simd_test(enable = "avx512f,avx512vl")]
56266    unsafe fn test_mm_mask_test_epi32_mask() {
56267        let a = _mm_set1_epi32(1 << 0);
56268        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56269        let r = _mm_mask_test_epi32_mask(0, a, b);
56270        assert_eq!(r, 0);
56271        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
56272        let e: __mmask8 = 0b00001111;
56273        assert_eq!(r, e);
56274    }
56275
56276    #[simd_test(enable = "avx512f")]
56277    unsafe fn test_mm512_testn_epi32_mask() {
56278        let a = _mm512_set1_epi32(1 << 0);
56279        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56280        let r = _mm512_testn_epi32_mask(a, b);
56281        let e: __mmask16 = 0b00000000_00000000;
56282        assert_eq!(r, e);
56283    }
56284
56285    #[simd_test(enable = "avx512f")]
56286    unsafe fn test_mm512_mask_testn_epi32_mask() {
56287        let a = _mm512_set1_epi32(1 << 0);
56288        let b = _mm512_set1_epi32(1 << 1);
56289        let r = _mm512_mask_test_epi32_mask(0, a, b);
56290        assert_eq!(r, 0);
56291        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
56292        let e: __mmask16 = 0b11111111_11111111;
56293        assert_eq!(r, e);
56294    }
56295
56296    #[simd_test(enable = "avx512f,avx512vl")]
56297    unsafe fn test_mm256_testn_epi32_mask() {
56298        let a = _mm256_set1_epi32(1 << 0);
56299        let b = _mm256_set1_epi32(1 << 1);
56300        let r = _mm256_testn_epi32_mask(a, b);
56301        let e: __mmask8 = 0b11111111;
56302        assert_eq!(r, e);
56303    }
56304
56305    #[simd_test(enable = "avx512f,avx512vl")]
56306    unsafe fn test_mm256_mask_testn_epi32_mask() {
56307        let a = _mm256_set1_epi32(1 << 0);
56308        let b = _mm256_set1_epi32(1 << 1);
56309        let r = _mm256_mask_test_epi32_mask(0, a, b);
56310        assert_eq!(r, 0);
56311        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
56312        let e: __mmask8 = 0b11111111;
56313        assert_eq!(r, e);
56314    }
56315
56316    #[simd_test(enable = "avx512f,avx512vl")]
56317    unsafe fn test_mm_testn_epi32_mask() {
56318        let a = _mm_set1_epi32(1 << 0);
56319        let b = _mm_set1_epi32(1 << 1);
56320        let r = _mm_testn_epi32_mask(a, b);
56321        let e: __mmask8 = 0b00001111;
56322        assert_eq!(r, e);
56323    }
56324
56325    #[simd_test(enable = "avx512f,avx512vl")]
56326    unsafe fn test_mm_mask_testn_epi32_mask() {
56327        let a = _mm_set1_epi32(1 << 0);
56328        let b = _mm_set1_epi32(1 << 1);
56329        let r = _mm_mask_test_epi32_mask(0, a, b);
56330        assert_eq!(r, 0);
56331        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
56332        let e: __mmask8 = 0b00001111;
56333        assert_eq!(r, e);
56334    }
56335
56336    #[simd_test(enable = "avx512f")]
56337    #[cfg_attr(miri, ignore)]
56338    unsafe fn test_mm512_stream_ps() {
56339        #[repr(align(64))]
56340        struct Memory {
56341            pub data: [f32; 16], // 64 bytes
56342        }
56343        let a = _mm512_set1_ps(7.0);
56344        let mut mem = Memory { data: [-1.0; 16] };
56345
56346        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
56347        _mm_sfence();
56348        for i in 0..16 {
56349            assert_eq!(mem.data[i], get_m512(a, i));
56350        }
56351    }
56352
56353    #[simd_test(enable = "avx512f")]
56354    #[cfg_attr(miri, ignore)]
56355    unsafe fn test_mm512_stream_pd() {
56356        #[repr(align(64))]
56357        struct Memory {
56358            pub data: [f64; 8],
56359        }
56360        let a = _mm512_set1_pd(7.0);
56361        let mut mem = Memory { data: [-1.0; 8] };
56362
56363        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
56364        _mm_sfence();
56365        for i in 0..8 {
56366            assert_eq!(mem.data[i], get_m512d(a, i));
56367        }
56368    }
56369
56370    #[simd_test(enable = "avx512f")]
56371    #[cfg_attr(miri, ignore)]
56372    unsafe fn test_mm512_stream_si512() {
56373        #[repr(align(64))]
56374        struct Memory {
56375            pub data: [i64; 8],
56376        }
56377        let a = _mm512_set1_epi32(7);
56378        let mut mem = Memory { data: [-1; 8] };
56379
56380        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56381        _mm_sfence();
56382        for i in 0..8 {
56383            assert_eq!(mem.data[i], get_m512i(a, i));
56384        }
56385    }
56386
56387    #[simd_test(enable = "avx512f")]
56388    unsafe fn test_mm512_stream_load_si512() {
56389        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56390        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56391        assert_eq_m512i(a, r);
56392    }
56393
56394    #[simd_test(enable = "avx512f")]
56395    unsafe fn test_mm512_reduce_add_epi32() {
56396        let a = _mm512_set1_epi32(1);
56397        let e: i32 = _mm512_reduce_add_epi32(a);
56398        assert_eq!(16, e);
56399    }
56400
56401    #[simd_test(enable = "avx512f")]
56402    unsafe fn test_mm512_mask_reduce_add_epi32() {
56403        let a = _mm512_set1_epi32(1);
56404        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
56405        assert_eq!(8, e);
56406    }
56407
56408    #[simd_test(enable = "avx512f")]
56409    unsafe fn test_mm512_reduce_add_ps() {
56410        let a = _mm512_set1_ps(1.);
56411        let e: f32 = _mm512_reduce_add_ps(a);
56412        assert_eq!(16., e);
56413    }
56414
56415    #[simd_test(enable = "avx512f")]
56416    unsafe fn test_mm512_mask_reduce_add_ps() {
56417        let a = _mm512_set1_ps(1.);
56418        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
56419        assert_eq!(8., e);
56420    }
56421
56422    #[simd_test(enable = "avx512f")]
56423    unsafe fn test_mm512_reduce_mul_epi32() {
56424        let a = _mm512_set1_epi32(2);
56425        let e: i32 = _mm512_reduce_mul_epi32(a);
56426        assert_eq!(65536, e);
56427    }
56428
56429    #[simd_test(enable = "avx512f")]
56430    unsafe fn test_mm512_mask_reduce_mul_epi32() {
56431        let a = _mm512_set1_epi32(2);
56432        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
56433        assert_eq!(256, e);
56434    }
56435
56436    #[simd_test(enable = "avx512f")]
56437    unsafe fn test_mm512_reduce_mul_ps() {
56438        let a = _mm512_set1_ps(2.);
56439        let e: f32 = _mm512_reduce_mul_ps(a);
56440        assert_eq!(65536., e);
56441    }
56442
56443    #[simd_test(enable = "avx512f")]
56444    unsafe fn test_mm512_mask_reduce_mul_ps() {
56445        let a = _mm512_set1_ps(2.);
56446        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
56447        assert_eq!(256., e);
56448    }
56449
56450    #[simd_test(enable = "avx512f")]
56451    unsafe fn test_mm512_reduce_max_epi32() {
56452        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56453        let e: i32 = _mm512_reduce_max_epi32(a);
56454        assert_eq!(15, e);
56455    }
56456
56457    #[simd_test(enable = "avx512f")]
56458    unsafe fn test_mm512_mask_reduce_max_epi32() {
56459        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56460        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
56461        assert_eq!(7, e);
56462    }
56463
56464    #[simd_test(enable = "avx512f")]
56465    unsafe fn test_mm512_reduce_max_epu32() {
56466        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56467        let e: u32 = _mm512_reduce_max_epu32(a);
56468        assert_eq!(15, e);
56469    }
56470
56471    #[simd_test(enable = "avx512f")]
56472    unsafe fn test_mm512_mask_reduce_max_epu32() {
56473        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56474        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
56475        assert_eq!(7, e);
56476    }
56477
56478    #[simd_test(enable = "avx512f")]
56479    unsafe fn test_mm512_reduce_max_ps() {
56480        let a = _mm512_set_ps(
56481            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56482        );
56483        let e: f32 = _mm512_reduce_max_ps(a);
56484        assert_eq!(15., e);
56485    }
56486
56487    #[simd_test(enable = "avx512f")]
56488    unsafe fn test_mm512_mask_reduce_max_ps() {
56489        let a = _mm512_set_ps(
56490            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56491        );
56492        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
56493        assert_eq!(7., e);
56494    }
56495
56496    #[simd_test(enable = "avx512f")]
56497    unsafe fn test_mm512_reduce_min_epi32() {
56498        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56499        let e: i32 = _mm512_reduce_min_epi32(a);
56500        assert_eq!(0, e);
56501    }
56502
56503    #[simd_test(enable = "avx512f")]
56504    unsafe fn test_mm512_mask_reduce_min_epi32() {
56505        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56506        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
56507        assert_eq!(0, e);
56508    }
56509
56510    #[simd_test(enable = "avx512f")]
56511    unsafe fn test_mm512_reduce_min_epu32() {
56512        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56513        let e: u32 = _mm512_reduce_min_epu32(a);
56514        assert_eq!(0, e);
56515    }
56516
56517    #[simd_test(enable = "avx512f")]
56518    unsafe fn test_mm512_mask_reduce_min_epu32() {
56519        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56520        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
56521        assert_eq!(0, e);
56522    }
56523
56524    #[simd_test(enable = "avx512f")]
56525    unsafe fn test_mm512_reduce_min_ps() {
56526        let a = _mm512_set_ps(
56527            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56528        );
56529        let e: f32 = _mm512_reduce_min_ps(a);
56530        assert_eq!(0., e);
56531    }
56532
56533    #[simd_test(enable = "avx512f")]
56534    unsafe fn test_mm512_mask_reduce_min_ps() {
56535        let a = _mm512_set_ps(
56536            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56537        );
56538        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
56539        assert_eq!(0., e);
56540    }
56541
56542    #[simd_test(enable = "avx512f")]
56543    unsafe fn test_mm512_reduce_and_epi32() {
56544        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56545        let e: i32 = _mm512_reduce_and_epi32(a);
56546        assert_eq!(0, e);
56547    }
56548
56549    #[simd_test(enable = "avx512f")]
56550    unsafe fn test_mm512_mask_reduce_and_epi32() {
56551        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56552        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56553        assert_eq!(1, e);
56554    }
56555
56556    #[simd_test(enable = "avx512f")]
56557    unsafe fn test_mm512_reduce_or_epi32() {
56558        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56559        let e: i32 = _mm512_reduce_or_epi32(a);
56560        assert_eq!(3, e);
56561    }
56562
56563    #[simd_test(enable = "avx512f")]
56564    unsafe fn test_mm512_mask_reduce_or_epi32() {
56565        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56566        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56567        assert_eq!(1, e);
56568    }
56569
56570    #[simd_test(enable = "avx512f")]
56571    unsafe fn test_mm512_mask_compress_epi32() {
56572        let src = _mm512_set1_epi32(200);
56573        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56574        let r = _mm512_mask_compress_epi32(src, 0, a);
56575        assert_eq_m512i(r, src);
56576        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
56577        let e = _mm512_set_epi32(
56578            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
56579        );
56580        assert_eq_m512i(r, e);
56581    }
56582
56583    #[simd_test(enable = "avx512f")]
56584    unsafe fn test_mm512_maskz_compress_epi32() {
56585        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56586        let r = _mm512_maskz_compress_epi32(0, a);
56587        assert_eq_m512i(r, _mm512_setzero_si512());
56588        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
56589        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
56590        assert_eq_m512i(r, e);
56591    }
56592
56593    #[simd_test(enable = "avx512f,avx512vl")]
56594    unsafe fn test_mm256_mask_compress_epi32() {
56595        let src = _mm256_set1_epi32(200);
56596        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56597        let r = _mm256_mask_compress_epi32(src, 0, a);
56598        assert_eq_m256i(r, src);
56599        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
56600        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
56601        assert_eq_m256i(r, e);
56602    }
56603
56604    #[simd_test(enable = "avx512f,avx512vl")]
56605    unsafe fn test_mm256_maskz_compress_epi32() {
56606        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56607        let r = _mm256_maskz_compress_epi32(0, a);
56608        assert_eq_m256i(r, _mm256_setzero_si256());
56609        let r = _mm256_maskz_compress_epi32(0b01010101, a);
56610        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
56611        assert_eq_m256i(r, e);
56612    }
56613
56614    #[simd_test(enable = "avx512f,avx512vl")]
56615    unsafe fn test_mm_mask_compress_epi32() {
56616        let src = _mm_set1_epi32(200);
56617        let a = _mm_set_epi32(0, 1, 2, 3);
56618        let r = _mm_mask_compress_epi32(src, 0, a);
56619        assert_eq_m128i(r, src);
56620        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
56621        let e = _mm_set_epi32(200, 200, 1, 3);
56622        assert_eq_m128i(r, e);
56623    }
56624
56625    #[simd_test(enable = "avx512f,avx512vl")]
56626    unsafe fn test_mm_maskz_compress_epi32() {
56627        let a = _mm_set_epi32(0, 1, 2, 3);
56628        let r = _mm_maskz_compress_epi32(0, a);
56629        assert_eq_m128i(r, _mm_setzero_si128());
56630        let r = _mm_maskz_compress_epi32(0b00000101, a);
56631        let e = _mm_set_epi32(0, 0, 1, 3);
56632        assert_eq_m128i(r, e);
56633    }
56634
56635    #[simd_test(enable = "avx512f")]
56636    unsafe fn test_mm512_mask_compress_ps() {
56637        let src = _mm512_set1_ps(200.);
56638        let a = _mm512_set_ps(
56639            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56640        );
56641        let r = _mm512_mask_compress_ps(src, 0, a);
56642        assert_eq_m512(r, src);
56643        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
56644        let e = _mm512_set_ps(
56645            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
56646        );
56647        assert_eq_m512(r, e);
56648    }
56649
56650    #[simd_test(enable = "avx512f")]
56651    unsafe fn test_mm512_maskz_compress_ps() {
56652        let a = _mm512_set_ps(
56653            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56654        );
56655        let r = _mm512_maskz_compress_ps(0, a);
56656        assert_eq_m512(r, _mm512_setzero_ps());
56657        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
56658        let e = _mm512_set_ps(
56659            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
56660        );
56661        assert_eq_m512(r, e);
56662    }
56663
56664    #[simd_test(enable = "avx512f,avx512vl")]
56665    unsafe fn test_mm256_mask_compress_ps() {
56666        let src = _mm256_set1_ps(200.);
56667        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56668        let r = _mm256_mask_compress_ps(src, 0, a);
56669        assert_eq_m256(r, src);
56670        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
56671        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
56672        assert_eq_m256(r, e);
56673    }
56674
56675    #[simd_test(enable = "avx512f,avx512vl")]
56676    unsafe fn test_mm256_maskz_compress_ps() {
56677        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56678        let r = _mm256_maskz_compress_ps(0, a);
56679        assert_eq_m256(r, _mm256_setzero_ps());
56680        let r = _mm256_maskz_compress_ps(0b01010101, a);
56681        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
56682        assert_eq_m256(r, e);
56683    }
56684
56685    #[simd_test(enable = "avx512f,avx512vl")]
56686    unsafe fn test_mm_mask_compress_ps() {
56687        let src = _mm_set1_ps(200.);
56688        let a = _mm_set_ps(0., 1., 2., 3.);
56689        let r = _mm_mask_compress_ps(src, 0, a);
56690        assert_eq_m128(r, src);
56691        let r = _mm_mask_compress_ps(src, 0b00000101, a);
56692        let e = _mm_set_ps(200., 200., 1., 3.);
56693        assert_eq_m128(r, e);
56694    }
56695
56696    #[simd_test(enable = "avx512f,avx512vl")]
56697    unsafe fn test_mm_maskz_compress_ps() {
56698        let a = _mm_set_ps(0., 1., 2., 3.);
56699        let r = _mm_maskz_compress_ps(0, a);
56700        assert_eq_m128(r, _mm_setzero_ps());
56701        let r = _mm_maskz_compress_ps(0b00000101, a);
56702        let e = _mm_set_ps(0., 0., 1., 3.);
56703        assert_eq_m128(r, e);
56704    }
56705
56706    #[simd_test(enable = "avx512f")]
56707    unsafe fn test_mm512_mask_compressstoreu_epi32() {
56708        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56709        let mut r = [0_i32; 16];
56710        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56711        assert_eq!(&r, &[0_i32; 16]);
56712        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
56713        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
56714    }
56715
56716    #[simd_test(enable = "avx512f,avx512vl")]
56717    unsafe fn test_mm256_mask_compressstoreu_epi32() {
56718        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56719        let mut r = [0_i32; 8];
56720        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56721        assert_eq!(&r, &[0_i32; 8]);
56722        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
56723        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56724    }
56725
56726    #[simd_test(enable = "avx512f,avx512vl")]
56727    unsafe fn test_mm_mask_compressstoreu_epi32() {
56728        let a = _mm_setr_epi32(1, 2, 3, 4);
56729        let mut r = [0_i32; 4];
56730        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56731        assert_eq!(&r, &[0_i32; 4]);
56732        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
56733        assert_eq!(&r, &[1, 2, 4, 0]);
56734    }
56735
56736    #[simd_test(enable = "avx512f")]
56737    unsafe fn test_mm512_mask_compressstoreu_epi64() {
56738        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56739        let mut r = [0_i64; 8];
56740        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56741        assert_eq!(&r, &[0_i64; 8]);
56742        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
56743        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56744    }
56745
56746    #[simd_test(enable = "avx512f,avx512vl")]
56747    unsafe fn test_mm256_mask_compressstoreu_epi64() {
56748        let a = _mm256_setr_epi64x(1, 2, 3, 4);
56749        let mut r = [0_i64; 4];
56750        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56751        assert_eq!(&r, &[0_i64; 4]);
56752        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
56753        assert_eq!(&r, &[1, 2, 4, 0]);
56754    }
56755
56756    #[simd_test(enable = "avx512f,avx512vl")]
56757    unsafe fn test_mm_mask_compressstoreu_epi64() {
56758        let a = _mm_setr_epi64x(1, 2);
56759        let mut r = [0_i64; 2];
56760        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56761        assert_eq!(&r, &[0_i64; 2]);
56762        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
56763        assert_eq!(&r, &[2, 0]);
56764    }
56765
56766    #[simd_test(enable = "avx512f")]
56767    unsafe fn test_mm512_mask_compressstoreu_ps() {
56768        let a = _mm512_setr_ps(
56769            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
56770            13_f32, 14_f32, 15_f32, 16_f32,
56771        );
56772        let mut r = [0_f32; 16];
56773        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56774        assert_eq!(&r, &[0_f32; 16]);
56775        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
56776        assert_eq!(
56777            &r,
56778            &[
56779                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
56780                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
56781            ]
56782        );
56783    }
56784
56785    #[simd_test(enable = "avx512f,avx512vl")]
56786    unsafe fn test_mm256_mask_compressstoreu_ps() {
56787        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
56788        let mut r = [0_f32; 8];
56789        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56790        assert_eq!(&r, &[0_f32; 8]);
56791        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
56792        assert_eq!(
56793            &r,
56794            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
56795        );
56796    }
56797
56798    #[simd_test(enable = "avx512f,avx512vl")]
56799    unsafe fn test_mm_mask_compressstoreu_ps() {
56800        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
56801        let mut r = [0.; 4];
56802        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56803        assert_eq!(&r, &[0.; 4]);
56804        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
56805        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
56806    }
56807
56808    #[simd_test(enable = "avx512f")]
56809    unsafe fn test_mm512_mask_compressstoreu_pd() {
56810        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
56811        let mut r = [0.; 8];
56812        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56813        assert_eq!(&r, &[0.; 8]);
56814        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
56815        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
56816    }
56817
56818    #[simd_test(enable = "avx512f,avx512vl")]
56819    unsafe fn test_mm256_mask_compressstoreu_pd() {
56820        let a = _mm256_setr_pd(1., 2., 3., 4.);
56821        let mut r = [0.; 4];
56822        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56823        assert_eq!(&r, &[0.; 4]);
56824        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
56825        assert_eq!(&r, &[1., 2., 4., 0.]);
56826    }
56827
56828    #[simd_test(enable = "avx512f,avx512vl")]
56829    unsafe fn test_mm_mask_compressstoreu_pd() {
56830        let a = _mm_setr_pd(1., 2.);
56831        let mut r = [0.; 2];
56832        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56833        assert_eq!(&r, &[0.; 2]);
56834        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
56835        assert_eq!(&r, &[2., 0.]);
56836    }
56837
56838    #[simd_test(enable = "avx512f")]
56839    unsafe fn test_mm512_mask_expand_epi32() {
56840        let src = _mm512_set1_epi32(200);
56841        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56842        let r = _mm512_mask_expand_epi32(src, 0, a);
56843        assert_eq_m512i(r, src);
56844        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
56845        let e = _mm512_set_epi32(
56846            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
56847        );
56848        assert_eq_m512i(r, e);
56849    }
56850
56851    #[simd_test(enable = "avx512f")]
56852    unsafe fn test_mm512_maskz_expand_epi32() {
56853        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56854        let r = _mm512_maskz_expand_epi32(0, a);
56855        assert_eq_m512i(r, _mm512_setzero_si512());
56856        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
56857        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
56858        assert_eq_m512i(r, e);
56859    }
56860
56861    #[simd_test(enable = "avx512f,avx512vl")]
56862    unsafe fn test_mm256_mask_expand_epi32() {
56863        let src = _mm256_set1_epi32(200);
56864        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56865        let r = _mm256_mask_expand_epi32(src, 0, a);
56866        assert_eq_m256i(r, src);
56867        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
56868        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
56869        assert_eq_m256i(r, e);
56870    }
56871
56872    #[simd_test(enable = "avx512f,avx512vl")]
56873    unsafe fn test_mm256_maskz_expand_epi32() {
56874        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56875        let r = _mm256_maskz_expand_epi32(0, a);
56876        assert_eq_m256i(r, _mm256_setzero_si256());
56877        let r = _mm256_maskz_expand_epi32(0b01010101, a);
56878        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
56879        assert_eq_m256i(r, e);
56880    }
56881
56882    #[simd_test(enable = "avx512f,avx512vl")]
56883    unsafe fn test_mm_mask_expand_epi32() {
56884        let src = _mm_set1_epi32(200);
56885        let a = _mm_set_epi32(0, 1, 2, 3);
56886        let r = _mm_mask_expand_epi32(src, 0, a);
56887        assert_eq_m128i(r, src);
56888        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
56889        let e = _mm_set_epi32(200, 2, 200, 3);
56890        assert_eq_m128i(r, e);
56891    }
56892
56893    #[simd_test(enable = "avx512f,avx512vl")]
56894    unsafe fn test_mm_maskz_expand_epi32() {
56895        let a = _mm_set_epi32(0, 1, 2, 3);
56896        let r = _mm_maskz_expand_epi32(0, a);
56897        assert_eq_m128i(r, _mm_setzero_si128());
56898        let r = _mm_maskz_expand_epi32(0b00000101, a);
56899        let e = _mm_set_epi32(0, 2, 0, 3);
56900        assert_eq_m128i(r, e);
56901    }
56902
56903    #[simd_test(enable = "avx512f")]
56904    unsafe fn test_mm512_mask_expand_ps() {
56905        let src = _mm512_set1_ps(200.);
56906        let a = _mm512_set_ps(
56907            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56908        );
56909        let r = _mm512_mask_expand_ps(src, 0, a);
56910        assert_eq_m512(r, src);
56911        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
56912        let e = _mm512_set_ps(
56913            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
56914        );
56915        assert_eq_m512(r, e);
56916    }
56917
56918    #[simd_test(enable = "avx512f")]
56919    unsafe fn test_mm512_maskz_expand_ps() {
56920        let a = _mm512_set_ps(
56921            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56922        );
56923        let r = _mm512_maskz_expand_ps(0, a);
56924        assert_eq_m512(r, _mm512_setzero_ps());
56925        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
56926        let e = _mm512_set_ps(
56927            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
56928        );
56929        assert_eq_m512(r, e);
56930    }
56931
56932    #[simd_test(enable = "avx512f,avx512vl")]
56933    unsafe fn test_mm256_mask_expand_ps() {
56934        let src = _mm256_set1_ps(200.);
56935        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56936        let r = _mm256_mask_expand_ps(src, 0, a);
56937        assert_eq_m256(r, src);
56938        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
56939        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
56940        assert_eq_m256(r, e);
56941    }
56942
56943    #[simd_test(enable = "avx512f,avx512vl")]
56944    unsafe fn test_mm256_maskz_expand_ps() {
56945        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56946        let r = _mm256_maskz_expand_ps(0, a);
56947        assert_eq_m256(r, _mm256_setzero_ps());
56948        let r = _mm256_maskz_expand_ps(0b01010101, a);
56949        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
56950        assert_eq_m256(r, e);
56951    }
56952
56953    #[simd_test(enable = "avx512f,avx512vl")]
56954    unsafe fn test_mm_mask_expand_ps() {
56955        let src = _mm_set1_ps(200.);
56956        let a = _mm_set_ps(0., 1., 2., 3.);
56957        let r = _mm_mask_expand_ps(src, 0, a);
56958        assert_eq_m128(r, src);
56959        let r = _mm_mask_expand_ps(src, 0b00000101, a);
56960        let e = _mm_set_ps(200., 2., 200., 3.);
56961        assert_eq_m128(r, e);
56962    }
56963
56964    #[simd_test(enable = "avx512f,avx512vl")]
56965    unsafe fn test_mm_maskz_expand_ps() {
56966        let a = _mm_set_ps(0., 1., 2., 3.);
56967        let r = _mm_maskz_expand_ps(0, a);
56968        assert_eq_m128(r, _mm_setzero_ps());
56969        let r = _mm_maskz_expand_ps(0b00000101, a);
56970        let e = _mm_set_ps(0., 2., 0., 3.);
56971        assert_eq_m128(r, e);
56972    }
56973
56974    #[simd_test(enable = "avx512f")]
56975    unsafe fn test_mm512_loadu_epi32() {
56976        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
56977        let p = a.as_ptr();
56978        let r = _mm512_loadu_epi32(black_box(p));
56979        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
56980        assert_eq_m512i(r, e);
56981    }
56982
56983    #[simd_test(enable = "avx512f,avx512vl")]
56984    unsafe fn test_mm256_loadu_epi32() {
56985        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
56986        let p = a.as_ptr();
56987        let r = _mm256_loadu_epi32(black_box(p));
56988        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
56989        assert_eq_m256i(r, e);
56990    }
56991
56992    #[simd_test(enable = "avx512f,avx512vl")]
56993    unsafe fn test_mm_loadu_epi32() {
56994        let a = &[4, 3, 2, 5];
56995        let p = a.as_ptr();
56996        let r = _mm_loadu_epi32(black_box(p));
56997        let e = _mm_setr_epi32(4, 3, 2, 5);
56998        assert_eq_m128i(r, e);
56999    }
57000
57001    #[simd_test(enable = "avx512f")]
57002    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57003        let a = _mm512_set1_epi32(9);
57004        let mut r = _mm256_undefined_si256();
57005        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57006        let e = _mm256_set1_epi16(9);
57007        assert_eq_m256i(r, e);
57008    }
57009
57010    #[simd_test(enable = "avx512f,avx512vl")]
57011    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57012        let a = _mm256_set1_epi32(9);
57013        let mut r = _mm_undefined_si128();
57014        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57015        let e = _mm_set1_epi16(9);
57016        assert_eq_m128i(r, e);
57017    }
57018
57019    #[simd_test(enable = "avx512f,avx512vl")]
57020    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57021        let a = _mm_set1_epi32(9);
57022        let mut r = _mm_set1_epi8(0);
57023        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57024        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
57025        assert_eq_m128i(r, e);
57026    }
57027
57028    #[simd_test(enable = "avx512f")]
57029    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57030        let a = _mm512_set1_epi32(i32::MAX);
57031        let mut r = _mm256_undefined_si256();
57032        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57033        let e = _mm256_set1_epi16(i16::MAX);
57034        assert_eq_m256i(r, e);
57035    }
57036
57037    #[simd_test(enable = "avx512f,avx512vl")]
57038    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57039        let a = _mm256_set1_epi32(i32::MAX);
57040        let mut r = _mm_undefined_si128();
57041        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57042        let e = _mm_set1_epi16(i16::MAX);
57043        assert_eq_m128i(r, e);
57044    }
57045
57046    #[simd_test(enable = "avx512f,avx512vl")]
57047    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57048        let a = _mm_set1_epi32(i32::MAX);
57049        let mut r = _mm_set1_epi8(0);
57050        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57051        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57052        assert_eq_m128i(r, e);
57053    }
57054
57055    #[simd_test(enable = "avx512f")]
57056    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57057        let a = _mm512_set1_epi32(i32::MAX);
57058        let mut r = _mm256_undefined_si256();
57059        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57060        let e = _mm256_set1_epi16(u16::MAX as i16);
57061        assert_eq_m256i(r, e);
57062    }
57063
57064    #[simd_test(enable = "avx512f,avx512vl")]
57065    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57066        let a = _mm256_set1_epi32(i32::MAX);
57067        let mut r = _mm_undefined_si128();
57068        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57069        let e = _mm_set1_epi16(u16::MAX as i16);
57070        assert_eq_m128i(r, e);
57071    }
57072
57073    #[simd_test(enable = "avx512f,avx512vl")]
57074    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57075        let a = _mm_set1_epi32(i32::MAX);
57076        let mut r = _mm_set1_epi8(0);
57077        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57078        let e = _mm_set_epi16(
57079            0,
57080            0,
57081            0,
57082            0,
57083            u16::MAX as i16,
57084            u16::MAX as i16,
57085            u16::MAX as i16,
57086            u16::MAX as i16,
57087        );
57088        assert_eq_m128i(r, e);
57089    }
57090
57091    #[simd_test(enable = "avx512f")]
57092    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57093        let a = _mm512_set1_epi32(9);
57094        let mut r = _mm_undefined_si128();
57095        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57096        let e = _mm_set1_epi8(9);
57097        assert_eq_m128i(r, e);
57098    }
57099
57100    #[simd_test(enable = "avx512f,avx512vl")]
57101    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57102        let a = _mm256_set1_epi32(9);
57103        let mut r = _mm_set1_epi8(0);
57104        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57105        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
57106        assert_eq_m128i(r, e);
57107    }
57108
57109    #[simd_test(enable = "avx512f,avx512vl")]
57110    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57111        let a = _mm_set1_epi32(9);
57112        let mut r = _mm_set1_epi8(0);
57113        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57114        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
57115        assert_eq_m128i(r, e);
57116    }
57117
57118    #[simd_test(enable = "avx512f")]
57119    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57120        let a = _mm512_set1_epi32(i32::MAX);
57121        let mut r = _mm_undefined_si128();
57122        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57123        let e = _mm_set1_epi8(i8::MAX);
57124        assert_eq_m128i(r, e);
57125    }
57126
57127    #[simd_test(enable = "avx512f,avx512vl")]
57128    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57129        let a = _mm256_set1_epi32(i32::MAX);
57130        let mut r = _mm_set1_epi8(0);
57131        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57132        #[rustfmt::skip]
57133        let e = _mm_set_epi8(
57134            0, 0, 0, 0,
57135            0, 0, 0, 0,
57136            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57137            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57138        );
57139        assert_eq_m128i(r, e);
57140    }
57141
57142    #[simd_test(enable = "avx512f,avx512vl")]
57143    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57144        let a = _mm_set1_epi32(i32::MAX);
57145        let mut r = _mm_set1_epi8(0);
57146        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57147        #[rustfmt::skip]
57148        let e = _mm_set_epi8(
57149            0, 0, 0, 0,
57150            0, 0, 0, 0,
57151            0, 0, 0, 0,
57152            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57153        );
57154        assert_eq_m128i(r, e);
57155    }
57156
57157    #[simd_test(enable = "avx512f")]
57158    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57159        let a = _mm512_set1_epi32(i32::MAX);
57160        let mut r = _mm_undefined_si128();
57161        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57162        let e = _mm_set1_epi8(u8::MAX as i8);
57163        assert_eq_m128i(r, e);
57164    }
57165
57166    #[simd_test(enable = "avx512f,avx512vl")]
57167    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57168        let a = _mm256_set1_epi32(i32::MAX);
57169        let mut r = _mm_set1_epi8(0);
57170        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57171        #[rustfmt::skip]
57172        let e = _mm_set_epi8(
57173            0, 0, 0, 0,
57174            0, 0, 0, 0,
57175            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57176            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57177        );
57178        assert_eq_m128i(r, e);
57179    }
57180
57181    #[simd_test(enable = "avx512f,avx512vl")]
57182    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57183        let a = _mm_set1_epi32(i32::MAX);
57184        let mut r = _mm_set1_epi8(0);
57185        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57186        #[rustfmt::skip]
57187        let e = _mm_set_epi8(
57188            0, 0, 0, 0,
57189            0, 0, 0, 0,
57190            0, 0, 0, 0,
57191            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57192        );
57193        assert_eq_m128i(r, e);
57194    }
57195
57196    #[simd_test(enable = "avx512f")]
57197    unsafe fn test_mm512_storeu_epi32() {
57198        let a = _mm512_set1_epi32(9);
57199        let mut r = _mm512_undefined_epi32();
57200        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57201        assert_eq_m512i(r, a);
57202    }
57203
57204    #[simd_test(enable = "avx512f,avx512vl")]
57205    unsafe fn test_mm256_storeu_epi32() {
57206        let a = _mm256_set1_epi32(9);
57207        let mut r = _mm256_undefined_si256();
57208        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57209        assert_eq_m256i(r, a);
57210    }
57211
57212    #[simd_test(enable = "avx512f,avx512vl")]
57213    unsafe fn test_mm_storeu_epi32() {
57214        let a = _mm_set1_epi32(9);
57215        let mut r = _mm_undefined_si128();
57216        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57217        assert_eq_m128i(r, a);
57218    }
57219
57220    #[simd_test(enable = "avx512f")]
57221    unsafe fn test_mm512_loadu_si512() {
57222        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57223        let p = a.as_ptr().cast();
57224        let r = _mm512_loadu_si512(black_box(p));
57225        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57226        assert_eq_m512i(r, e);
57227    }
57228
57229    #[simd_test(enable = "avx512f")]
57230    unsafe fn test_mm512_storeu_si512() {
57231        let a = _mm512_set1_epi32(9);
57232        let mut r = _mm512_undefined_epi32();
57233        _mm512_storeu_si512(&mut r as *mut _, a);
57234        assert_eq_m512i(r, a);
57235    }
57236
57237    #[simd_test(enable = "avx512f")]
57238    unsafe fn test_mm512_load_si512() {
57239        #[repr(align(64))]
57240        struct Align {
57241            data: [i32; 16], // 64 bytes
57242        }
57243        let a = Align {
57244            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57245        };
57246        let p = (a.data).as_ptr().cast();
57247        let r = _mm512_load_si512(black_box(p));
57248        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57249        assert_eq_m512i(r, e);
57250    }
57251
57252    #[simd_test(enable = "avx512f")]
57253    unsafe fn test_mm512_store_si512() {
57254        let a = _mm512_set1_epi32(9);
57255        let mut r = _mm512_undefined_epi32();
57256        _mm512_store_si512(&mut r as *mut _, a);
57257        assert_eq_m512i(r, a);
57258    }
57259
57260    #[simd_test(enable = "avx512f")]
57261    unsafe fn test_mm512_load_epi32() {
57262        #[repr(align(64))]
57263        struct Align {
57264            data: [i32; 16], // 64 bytes
57265        }
57266        let a = Align {
57267            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57268        };
57269        let p = (a.data).as_ptr();
57270        let r = _mm512_load_epi32(black_box(p));
57271        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57272        assert_eq_m512i(r, e);
57273    }
57274
57275    #[simd_test(enable = "avx512f,avx512vl")]
57276    unsafe fn test_mm256_load_epi32() {
57277        #[repr(align(64))]
57278        struct Align {
57279            data: [i32; 8],
57280        }
57281        let a = Align {
57282            data: [4, 3, 2, 5, 8, 9, 64, 50],
57283        };
57284        let p = (a.data).as_ptr();
57285        let r = _mm256_load_epi32(black_box(p));
57286        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57287        assert_eq_m256i(r, e);
57288    }
57289
57290    #[simd_test(enable = "avx512f,avx512vl")]
57291    unsafe fn test_mm_load_epi32() {
57292        #[repr(align(64))]
57293        struct Align {
57294            data: [i32; 4],
57295        }
57296        let a = Align { data: [4, 3, 2, 5] };
57297        let p = (a.data).as_ptr();
57298        let r = _mm_load_epi32(black_box(p));
57299        let e = _mm_setr_epi32(4, 3, 2, 5);
57300        assert_eq_m128i(r, e);
57301    }
57302
57303    #[simd_test(enable = "avx512f")]
57304    unsafe fn test_mm512_store_epi32() {
57305        let a = _mm512_set1_epi32(9);
57306        let mut r = _mm512_undefined_epi32();
57307        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57308        assert_eq_m512i(r, a);
57309    }
57310
57311    #[simd_test(enable = "avx512f,avx512vl")]
57312    unsafe fn test_mm256_store_epi32() {
57313        let a = _mm256_set1_epi32(9);
57314        let mut r = _mm256_undefined_si256();
57315        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57316        assert_eq_m256i(r, a);
57317    }
57318
57319    #[simd_test(enable = "avx512f,avx512vl")]
57320    unsafe fn test_mm_store_epi32() {
57321        let a = _mm_set1_epi32(9);
57322        let mut r = _mm_undefined_si128();
57323        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57324        assert_eq_m128i(r, a);
57325    }
57326
57327    #[simd_test(enable = "avx512f")]
57328    unsafe fn test_mm512_load_ps() {
57329        #[repr(align(64))]
57330        struct Align {
57331            data: [f32; 16], // 64 bytes
57332        }
57333        let a = Align {
57334            data: [
57335                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57336            ],
57337        };
57338        let p = (a.data).as_ptr();
57339        let r = _mm512_load_ps(black_box(p));
57340        let e = _mm512_setr_ps(
57341            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57342        );
57343        assert_eq_m512(r, e);
57344    }
57345
57346    #[simd_test(enable = "avx512f")]
57347    unsafe fn test_mm512_store_ps() {
57348        let a = _mm512_set1_ps(9.);
57349        let mut r = _mm512_undefined_ps();
57350        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57351        assert_eq_m512(r, a);
57352    }
57353
57354    #[simd_test(enable = "avx512f")]
57355    unsafe fn test_mm512_mask_set1_epi32() {
57356        let src = _mm512_set1_epi32(2);
57357        let a: i32 = 11;
57358        let r = _mm512_mask_set1_epi32(src, 0, a);
57359        assert_eq_m512i(r, src);
57360        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
57361        let e = _mm512_set1_epi32(11);
57362        assert_eq_m512i(r, e);
57363    }
57364
57365    #[simd_test(enable = "avx512f")]
57366    unsafe fn test_mm512_maskz_set1_epi32() {
57367        let a: i32 = 11;
57368        let r = _mm512_maskz_set1_epi32(0, a);
57369        assert_eq_m512i(r, _mm512_setzero_si512());
57370        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
57371        let e = _mm512_set1_epi32(11);
57372        assert_eq_m512i(r, e);
57373    }
57374
57375    #[simd_test(enable = "avx512f,avx512vl")]
57376    unsafe fn test_mm256_mask_set1_epi32() {
57377        let src = _mm256_set1_epi32(2);
57378        let a: i32 = 11;
57379        let r = _mm256_mask_set1_epi32(src, 0, a);
57380        assert_eq_m256i(r, src);
57381        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
57382        let e = _mm256_set1_epi32(11);
57383        assert_eq_m256i(r, e);
57384    }
57385
57386    #[simd_test(enable = "avx512f,avx512vl")]
57387    unsafe fn test_mm256_maskz_set1_epi32() {
57388        let a: i32 = 11;
57389        let r = _mm256_maskz_set1_epi32(0, a);
57390        assert_eq_m256i(r, _mm256_setzero_si256());
57391        let r = _mm256_maskz_set1_epi32(0b11111111, a);
57392        let e = _mm256_set1_epi32(11);
57393        assert_eq_m256i(r, e);
57394    }
57395
57396    #[simd_test(enable = "avx512f,avx512vl")]
57397    unsafe fn test_mm_mask_set1_epi32() {
57398        let src = _mm_set1_epi32(2);
57399        let a: i32 = 11;
57400        let r = _mm_mask_set1_epi32(src, 0, a);
57401        assert_eq_m128i(r, src);
57402        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
57403        let e = _mm_set1_epi32(11);
57404        assert_eq_m128i(r, e);
57405    }
57406
57407    #[simd_test(enable = "avx512f,avx512vl")]
57408    unsafe fn test_mm_maskz_set1_epi32() {
57409        let a: i32 = 11;
57410        let r = _mm_maskz_set1_epi32(0, a);
57411        assert_eq_m128i(r, _mm_setzero_si128());
57412        let r = _mm_maskz_set1_epi32(0b00001111, a);
57413        let e = _mm_set1_epi32(11);
57414        assert_eq_m128i(r, e);
57415    }
57416
57417    #[simd_test(enable = "avx512f")]
57418    unsafe fn test_mm_mask_move_ss() {
57419        let src = _mm_set_ps(10., 11., 100., 110.);
57420        let a = _mm_set_ps(1., 2., 10., 20.);
57421        let b = _mm_set_ps(3., 4., 30., 40.);
57422        let r = _mm_mask_move_ss(src, 0, a, b);
57423        let e = _mm_set_ps(1., 2., 10., 110.);
57424        assert_eq_m128(r, e);
57425        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
57426        let e = _mm_set_ps(1., 2., 10., 40.);
57427        assert_eq_m128(r, e);
57428    }
57429
57430    #[simd_test(enable = "avx512f")]
57431    unsafe fn test_mm_maskz_move_ss() {
57432        let a = _mm_set_ps(1., 2., 10., 20.);
57433        let b = _mm_set_ps(3., 4., 30., 40.);
57434        let r = _mm_maskz_move_ss(0, a, b);
57435        let e = _mm_set_ps(1., 2., 10., 0.);
57436        assert_eq_m128(r, e);
57437        let r = _mm_maskz_move_ss(0b11111111, a, b);
57438        let e = _mm_set_ps(1., 2., 10., 40.);
57439        assert_eq_m128(r, e);
57440    }
57441
57442    #[simd_test(enable = "avx512f")]
57443    unsafe fn test_mm_mask_move_sd() {
57444        let src = _mm_set_pd(10., 11.);
57445        let a = _mm_set_pd(1., 2.);
57446        let b = _mm_set_pd(3., 4.);
57447        let r = _mm_mask_move_sd(src, 0, a, b);
57448        let e = _mm_set_pd(1., 11.);
57449        assert_eq_m128d(r, e);
57450        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
57451        let e = _mm_set_pd(1., 4.);
57452        assert_eq_m128d(r, e);
57453    }
57454
57455    #[simd_test(enable = "avx512f")]
57456    unsafe fn test_mm_maskz_move_sd() {
57457        let a = _mm_set_pd(1., 2.);
57458        let b = _mm_set_pd(3., 4.);
57459        let r = _mm_maskz_move_sd(0, a, b);
57460        let e = _mm_set_pd(1., 0.);
57461        assert_eq_m128d(r, e);
57462        let r = _mm_maskz_move_sd(0b11111111, a, b);
57463        let e = _mm_set_pd(1., 4.);
57464        assert_eq_m128d(r, e);
57465    }
57466
57467    #[simd_test(enable = "avx512f")]
57468    unsafe fn test_mm_mask_add_ss() {
57469        let src = _mm_set_ps(10., 11., 100., 110.);
57470        let a = _mm_set_ps(1., 2., 10., 20.);
57471        let b = _mm_set_ps(3., 4., 30., 40.);
57472        let r = _mm_mask_add_ss(src, 0, a, b);
57473        let e = _mm_set_ps(1., 2., 10., 110.);
57474        assert_eq_m128(r, e);
57475        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
57476        let e = _mm_set_ps(1., 2., 10., 60.);
57477        assert_eq_m128(r, e);
57478    }
57479
57480    #[simd_test(enable = "avx512f")]
57481    unsafe fn test_mm_maskz_add_ss() {
57482        let a = _mm_set_ps(1., 2., 10., 20.);
57483        let b = _mm_set_ps(3., 4., 30., 40.);
57484        let r = _mm_maskz_add_ss(0, a, b);
57485        let e = _mm_set_ps(1., 2., 10., 0.);
57486        assert_eq_m128(r, e);
57487        let r = _mm_maskz_add_ss(0b11111111, a, b);
57488        let e = _mm_set_ps(1., 2., 10., 60.);
57489        assert_eq_m128(r, e);
57490    }
57491
57492    #[simd_test(enable = "avx512f")]
57493    unsafe fn test_mm_mask_add_sd() {
57494        let src = _mm_set_pd(10., 11.);
57495        let a = _mm_set_pd(1., 2.);
57496        let b = _mm_set_pd(3., 4.);
57497        let r = _mm_mask_add_sd(src, 0, a, b);
57498        let e = _mm_set_pd(1., 11.);
57499        assert_eq_m128d(r, e);
57500        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
57501        let e = _mm_set_pd(1., 6.);
57502        assert_eq_m128d(r, e);
57503    }
57504
57505    #[simd_test(enable = "avx512f")]
57506    unsafe fn test_mm_maskz_add_sd() {
57507        let a = _mm_set_pd(1., 2.);
57508        let b = _mm_set_pd(3., 4.);
57509        let r = _mm_maskz_add_sd(0, a, b);
57510        let e = _mm_set_pd(1., 0.);
57511        assert_eq_m128d(r, e);
57512        let r = _mm_maskz_add_sd(0b11111111, a, b);
57513        let e = _mm_set_pd(1., 6.);
57514        assert_eq_m128d(r, e);
57515    }
57516
57517    #[simd_test(enable = "avx512f")]
57518    unsafe fn test_mm_mask_sub_ss() {
57519        let src = _mm_set_ps(10., 11., 100., 110.);
57520        let a = _mm_set_ps(1., 2., 10., 20.);
57521        let b = _mm_set_ps(3., 4., 30., 40.);
57522        let r = _mm_mask_sub_ss(src, 0, a, b);
57523        let e = _mm_set_ps(1., 2., 10., 110.);
57524        assert_eq_m128(r, e);
57525        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
57526        let e = _mm_set_ps(1., 2., 10., -20.);
57527        assert_eq_m128(r, e);
57528    }
57529
57530    #[simd_test(enable = "avx512f")]
57531    unsafe fn test_mm_maskz_sub_ss() {
57532        let a = _mm_set_ps(1., 2., 10., 20.);
57533        let b = _mm_set_ps(3., 4., 30., 40.);
57534        let r = _mm_maskz_sub_ss(0, a, b);
57535        let e = _mm_set_ps(1., 2., 10., 0.);
57536        assert_eq_m128(r, e);
57537        let r = _mm_maskz_sub_ss(0b11111111, a, b);
57538        let e = _mm_set_ps(1., 2., 10., -20.);
57539        assert_eq_m128(r, e);
57540    }
57541
57542    #[simd_test(enable = "avx512f")]
57543    unsafe fn test_mm_mask_sub_sd() {
57544        let src = _mm_set_pd(10., 11.);
57545        let a = _mm_set_pd(1., 2.);
57546        let b = _mm_set_pd(3., 4.);
57547        let r = _mm_mask_sub_sd(src, 0, a, b);
57548        let e = _mm_set_pd(1., 11.);
57549        assert_eq_m128d(r, e);
57550        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
57551        let e = _mm_set_pd(1., -2.);
57552        assert_eq_m128d(r, e);
57553    }
57554
57555    #[simd_test(enable = "avx512f")]
57556    unsafe fn test_mm_maskz_sub_sd() {
57557        let a = _mm_set_pd(1., 2.);
57558        let b = _mm_set_pd(3., 4.);
57559        let r = _mm_maskz_sub_sd(0, a, b);
57560        let e = _mm_set_pd(1., 0.);
57561        assert_eq_m128d(r, e);
57562        let r = _mm_maskz_sub_sd(0b11111111, a, b);
57563        let e = _mm_set_pd(1., -2.);
57564        assert_eq_m128d(r, e);
57565    }
57566
57567    #[simd_test(enable = "avx512f")]
57568    unsafe fn test_mm_mask_mul_ss() {
57569        let src = _mm_set_ps(10., 11., 100., 110.);
57570        let a = _mm_set_ps(1., 2., 10., 20.);
57571        let b = _mm_set_ps(3., 4., 30., 40.);
57572        let r = _mm_mask_mul_ss(src, 0, a, b);
57573        let e = _mm_set_ps(1., 2., 10., 110.);
57574        assert_eq_m128(r, e);
57575        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
57576        let e = _mm_set_ps(1., 2., 10., 800.);
57577        assert_eq_m128(r, e);
57578    }
57579
57580    #[simd_test(enable = "avx512f")]
57581    unsafe fn test_mm_maskz_mul_ss() {
57582        let a = _mm_set_ps(1., 2., 10., 20.);
57583        let b = _mm_set_ps(3., 4., 30., 40.);
57584        let r = _mm_maskz_mul_ss(0, a, b);
57585        let e = _mm_set_ps(1., 2., 10., 0.);
57586        assert_eq_m128(r, e);
57587        let r = _mm_maskz_mul_ss(0b11111111, a, b);
57588        let e = _mm_set_ps(1., 2., 10., 800.);
57589        assert_eq_m128(r, e);
57590    }
57591
57592    #[simd_test(enable = "avx512f")]
57593    unsafe fn test_mm_mask_mul_sd() {
57594        let src = _mm_set_pd(10., 11.);
57595        let a = _mm_set_pd(1., 2.);
57596        let b = _mm_set_pd(3., 4.);
57597        let r = _mm_mask_mul_sd(src, 0, a, b);
57598        let e = _mm_set_pd(1., 11.);
57599        assert_eq_m128d(r, e);
57600        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
57601        let e = _mm_set_pd(1., 8.);
57602        assert_eq_m128d(r, e);
57603    }
57604
57605    #[simd_test(enable = "avx512f")]
57606    unsafe fn test_mm_maskz_mul_sd() {
57607        let a = _mm_set_pd(1., 2.);
57608        let b = _mm_set_pd(3., 4.);
57609        let r = _mm_maskz_mul_sd(0, a, b);
57610        let e = _mm_set_pd(1., 0.);
57611        assert_eq_m128d(r, e);
57612        let r = _mm_maskz_mul_sd(0b11111111, a, b);
57613        let e = _mm_set_pd(1., 8.);
57614        assert_eq_m128d(r, e);
57615    }
57616
57617    #[simd_test(enable = "avx512f")]
57618    unsafe fn test_mm_mask_div_ss() {
57619        let src = _mm_set_ps(10., 11., 100., 110.);
57620        let a = _mm_set_ps(1., 2., 10., 20.);
57621        let b = _mm_set_ps(3., 4., 30., 40.);
57622        let r = _mm_mask_div_ss(src, 0, a, b);
57623        let e = _mm_set_ps(1., 2., 10., 110.);
57624        assert_eq_m128(r, e);
57625        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
57626        let e = _mm_set_ps(1., 2., 10., 0.5);
57627        assert_eq_m128(r, e);
57628    }
57629
57630    #[simd_test(enable = "avx512f")]
57631    unsafe fn test_mm_maskz_div_ss() {
57632        let a = _mm_set_ps(1., 2., 10., 20.);
57633        let b = _mm_set_ps(3., 4., 30., 40.);
57634        let r = _mm_maskz_div_ss(0, a, b);
57635        let e = _mm_set_ps(1., 2., 10., 0.);
57636        assert_eq_m128(r, e);
57637        let r = _mm_maskz_div_ss(0b11111111, a, b);
57638        let e = _mm_set_ps(1., 2., 10., 0.5);
57639        assert_eq_m128(r, e);
57640    }
57641
57642    #[simd_test(enable = "avx512f")]
57643    unsafe fn test_mm_mask_div_sd() {
57644        let src = _mm_set_pd(10., 11.);
57645        let a = _mm_set_pd(1., 2.);
57646        let b = _mm_set_pd(3., 4.);
57647        let r = _mm_mask_div_sd(src, 0, a, b);
57648        let e = _mm_set_pd(1., 11.);
57649        assert_eq_m128d(r, e);
57650        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
57651        let e = _mm_set_pd(1., 0.5);
57652        assert_eq_m128d(r, e);
57653    }
57654
57655    #[simd_test(enable = "avx512f")]
57656    unsafe fn test_mm_maskz_div_sd() {
57657        let a = _mm_set_pd(1., 2.);
57658        let b = _mm_set_pd(3., 4.);
57659        let r = _mm_maskz_div_sd(0, a, b);
57660        let e = _mm_set_pd(1., 0.);
57661        assert_eq_m128d(r, e);
57662        let r = _mm_maskz_div_sd(0b11111111, a, b);
57663        let e = _mm_set_pd(1., 0.5);
57664        assert_eq_m128d(r, e);
57665    }
57666
57667    #[simd_test(enable = "avx512f")]
57668    unsafe fn test_mm_mask_max_ss() {
57669        let a = _mm_set_ps(0., 1., 2., 3.);
57670        let b = _mm_set_ps(4., 5., 6., 7.);
57671        let r = _mm_mask_max_ss(a, 0, a, b);
57672        let e = _mm_set_ps(0., 1., 2., 3.);
57673        assert_eq_m128(r, e);
57674        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
57675        let e = _mm_set_ps(0., 1., 2., 7.);
57676        assert_eq_m128(r, e);
57677    }
57678
57679    #[simd_test(enable = "avx512f")]
57680    unsafe fn test_mm_maskz_max_ss() {
57681        let a = _mm_set_ps(0., 1., 2., 3.);
57682        let b = _mm_set_ps(4., 5., 6., 7.);
57683        let r = _mm_maskz_max_ss(0, a, b);
57684        let e = _mm_set_ps(0., 1., 2., 0.);
57685        assert_eq_m128(r, e);
57686        let r = _mm_maskz_max_ss(0b11111111, a, b);
57687        let e = _mm_set_ps(0., 1., 2., 7.);
57688        assert_eq_m128(r, e);
57689    }
57690
57691    #[simd_test(enable = "avx512f")]
57692    unsafe fn test_mm_mask_max_sd() {
57693        let a = _mm_set_pd(0., 1.);
57694        let b = _mm_set_pd(2., 3.);
57695        let r = _mm_mask_max_sd(a, 0, a, b);
57696        let e = _mm_set_pd(0., 1.);
57697        assert_eq_m128d(r, e);
57698        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
57699        let e = _mm_set_pd(0., 3.);
57700        assert_eq_m128d(r, e);
57701    }
57702
57703    #[simd_test(enable = "avx512f")]
57704    unsafe fn test_mm_maskz_max_sd() {
57705        let a = _mm_set_pd(0., 1.);
57706        let b = _mm_set_pd(2., 3.);
57707        let r = _mm_maskz_max_sd(0, a, b);
57708        let e = _mm_set_pd(0., 0.);
57709        assert_eq_m128d(r, e);
57710        let r = _mm_maskz_max_sd(0b11111111, a, b);
57711        let e = _mm_set_pd(0., 3.);
57712        assert_eq_m128d(r, e);
57713    }
57714
57715    #[simd_test(enable = "avx512f")]
57716    unsafe fn test_mm_mask_min_ss() {
57717        let a = _mm_set_ps(0., 1., 2., 3.);
57718        let b = _mm_set_ps(4., 5., 6., 7.);
57719        let r = _mm_mask_min_ss(a, 0, a, b);
57720        let e = _mm_set_ps(0., 1., 2., 3.);
57721        assert_eq_m128(r, e);
57722        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
57723        let e = _mm_set_ps(0., 1., 2., 3.);
57724        assert_eq_m128(r, e);
57725    }
57726
57727    #[simd_test(enable = "avx512f")]
57728    unsafe fn test_mm_maskz_min_ss() {
57729        let a = _mm_set_ps(0., 1., 2., 3.);
57730        let b = _mm_set_ps(4., 5., 6., 7.);
57731        let r = _mm_maskz_min_ss(0, a, b);
57732        let e = _mm_set_ps(0., 1., 2., 0.);
57733        assert_eq_m128(r, e);
57734        let r = _mm_maskz_min_ss(0b11111111, a, b);
57735        let e = _mm_set_ps(0., 1., 2., 3.);
57736        assert_eq_m128(r, e);
57737    }
57738
57739    #[simd_test(enable = "avx512f")]
57740    unsafe fn test_mm_mask_min_sd() {
57741        let a = _mm_set_pd(0., 1.);
57742        let b = _mm_set_pd(2., 3.);
57743        let r = _mm_mask_min_sd(a, 0, a, b);
57744        let e = _mm_set_pd(0., 1.);
57745        assert_eq_m128d(r, e);
57746        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
57747        let e = _mm_set_pd(0., 1.);
57748        assert_eq_m128d(r, e);
57749    }
57750
57751    #[simd_test(enable = "avx512f")]
57752    unsafe fn test_mm_maskz_min_sd() {
57753        let a = _mm_set_pd(0., 1.);
57754        let b = _mm_set_pd(2., 3.);
57755        let r = _mm_maskz_min_sd(0, a, b);
57756        let e = _mm_set_pd(0., 0.);
57757        assert_eq_m128d(r, e);
57758        let r = _mm_maskz_min_sd(0b11111111, a, b);
57759        let e = _mm_set_pd(0., 1.);
57760        assert_eq_m128d(r, e);
57761    }
57762
57763    #[simd_test(enable = "avx512f")]
57764    unsafe fn test_mm_mask_sqrt_ss() {
57765        let src = _mm_set_ps(10., 11., 100., 110.);
57766        let a = _mm_set_ps(1., 2., 10., 20.);
57767        let b = _mm_set_ps(3., 4., 30., 4.);
57768        let r = _mm_mask_sqrt_ss(src, 0, a, b);
57769        let e = _mm_set_ps(1., 2., 10., 110.);
57770        assert_eq_m128(r, e);
57771        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
57772        let e = _mm_set_ps(1., 2., 10., 2.);
57773        assert_eq_m128(r, e);
57774    }
57775
57776    #[simd_test(enable = "avx512f")]
57777    unsafe fn test_mm_maskz_sqrt_ss() {
57778        let a = _mm_set_ps(1., 2., 10., 20.);
57779        let b = _mm_set_ps(3., 4., 30., 4.);
57780        let r = _mm_maskz_sqrt_ss(0, a, b);
57781        let e = _mm_set_ps(1., 2., 10., 0.);
57782        assert_eq_m128(r, e);
57783        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
57784        let e = _mm_set_ps(1., 2., 10., 2.);
57785        assert_eq_m128(r, e);
57786    }
57787
57788    #[simd_test(enable = "avx512f")]
57789    unsafe fn test_mm_mask_sqrt_sd() {
57790        let src = _mm_set_pd(10., 11.);
57791        let a = _mm_set_pd(1., 2.);
57792        let b = _mm_set_pd(3., 4.);
57793        let r = _mm_mask_sqrt_sd(src, 0, a, b);
57794        let e = _mm_set_pd(1., 11.);
57795        assert_eq_m128d(r, e);
57796        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
57797        let e = _mm_set_pd(1., 2.);
57798        assert_eq_m128d(r, e);
57799    }
57800
57801    #[simd_test(enable = "avx512f")]
57802    unsafe fn test_mm_maskz_sqrt_sd() {
57803        let a = _mm_set_pd(1., 2.);
57804        let b = _mm_set_pd(3., 4.);
57805        let r = _mm_maskz_sqrt_sd(0, a, b);
57806        let e = _mm_set_pd(1., 0.);
57807        assert_eq_m128d(r, e);
57808        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
57809        let e = _mm_set_pd(1., 2.);
57810        assert_eq_m128d(r, e);
57811    }
57812
57813    #[simd_test(enable = "avx512f")]
57814    unsafe fn test_mm_rsqrt14_ss() {
57815        let a = _mm_set_ps(1., 2., 10., 20.);
57816        let b = _mm_set_ps(3., 4., 30., 4.);
57817        let r = _mm_rsqrt14_ss(a, b);
57818        let e = _mm_set_ps(1., 2., 10., 0.5);
57819        assert_eq_m128(r, e);
57820    }
57821
57822    #[simd_test(enable = "avx512f")]
57823    unsafe fn test_mm_mask_rsqrt14_ss() {
57824        let src = _mm_set_ps(10., 11., 100., 110.);
57825        let a = _mm_set_ps(1., 2., 10., 20.);
57826        let b = _mm_set_ps(3., 4., 30., 4.);
57827        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
57828        let e = _mm_set_ps(1., 2., 10., 110.);
57829        assert_eq_m128(r, e);
57830        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
57831        let e = _mm_set_ps(1., 2., 10., 0.5);
57832        assert_eq_m128(r, e);
57833    }
57834
57835    #[simd_test(enable = "avx512f")]
57836    unsafe fn test_mm_maskz_rsqrt14_ss() {
57837        let a = _mm_set_ps(1., 2., 10., 20.);
57838        let b = _mm_set_ps(3., 4., 30., 4.);
57839        let r = _mm_maskz_rsqrt14_ss(0, a, b);
57840        let e = _mm_set_ps(1., 2., 10., 0.);
57841        assert_eq_m128(r, e);
57842        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
57843        let e = _mm_set_ps(1., 2., 10., 0.5);
57844        assert_eq_m128(r, e);
57845    }
57846
57847    #[simd_test(enable = "avx512f")]
57848    unsafe fn test_mm_rsqrt14_sd() {
57849        let a = _mm_set_pd(1., 2.);
57850        let b = _mm_set_pd(3., 4.);
57851        let r = _mm_rsqrt14_sd(a, b);
57852        let e = _mm_set_pd(1., 0.5);
57853        assert_eq_m128d(r, e);
57854    }
57855
57856    #[simd_test(enable = "avx512f")]
57857    unsafe fn test_mm_mask_rsqrt14_sd() {
57858        let src = _mm_set_pd(10., 11.);
57859        let a = _mm_set_pd(1., 2.);
57860        let b = _mm_set_pd(3., 4.);
57861        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
57862        let e = _mm_set_pd(1., 11.);
57863        assert_eq_m128d(r, e);
57864        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
57865        let e = _mm_set_pd(1., 0.5);
57866        assert_eq_m128d(r, e);
57867    }
57868
57869    #[simd_test(enable = "avx512f")]
57870    unsafe fn test_mm_maskz_rsqrt14_sd() {
57871        let a = _mm_set_pd(1., 2.);
57872        let b = _mm_set_pd(3., 4.);
57873        let r = _mm_maskz_rsqrt14_sd(0, a, b);
57874        let e = _mm_set_pd(1., 0.);
57875        assert_eq_m128d(r, e);
57876        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
57877        let e = _mm_set_pd(1., 0.5);
57878        assert_eq_m128d(r, e);
57879    }
57880
57881    #[simd_test(enable = "avx512f")]
57882    unsafe fn test_mm_rcp14_ss() {
57883        let a = _mm_set_ps(1., 2., 10., 20.);
57884        let b = _mm_set_ps(3., 4., 30., 4.);
57885        let r = _mm_rcp14_ss(a, b);
57886        let e = _mm_set_ps(1., 2., 10., 0.25);
57887        assert_eq_m128(r, e);
57888    }
57889
57890    #[simd_test(enable = "avx512f")]
57891    unsafe fn test_mm_mask_rcp14_ss() {
57892        let src = _mm_set_ps(10., 11., 100., 110.);
57893        let a = _mm_set_ps(1., 2., 10., 20.);
57894        let b = _mm_set_ps(3., 4., 30., 4.);
57895        let r = _mm_mask_rcp14_ss(src, 0, a, b);
57896        let e = _mm_set_ps(1., 2., 10., 110.);
57897        assert_eq_m128(r, e);
57898        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
57899        let e = _mm_set_ps(1., 2., 10., 0.25);
57900        assert_eq_m128(r, e);
57901    }
57902
57903    #[simd_test(enable = "avx512f")]
57904    unsafe fn test_mm_maskz_rcp14_ss() {
57905        let a = _mm_set_ps(1., 2., 10., 20.);
57906        let b = _mm_set_ps(3., 4., 30., 4.);
57907        let r = _mm_maskz_rcp14_ss(0, a, b);
57908        let e = _mm_set_ps(1., 2., 10., 0.);
57909        assert_eq_m128(r, e);
57910        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
57911        let e = _mm_set_ps(1., 2., 10., 0.25);
57912        assert_eq_m128(r, e);
57913    }
57914
57915    #[simd_test(enable = "avx512f")]
57916    unsafe fn test_mm_rcp14_sd() {
57917        let a = _mm_set_pd(1., 2.);
57918        let b = _mm_set_pd(3., 4.);
57919        let r = _mm_rcp14_sd(a, b);
57920        let e = _mm_set_pd(1., 0.25);
57921        assert_eq_m128d(r, e);
57922    }
57923
57924    #[simd_test(enable = "avx512f")]
57925    unsafe fn test_mm_mask_rcp14_sd() {
57926        let src = _mm_set_pd(10., 11.);
57927        let a = _mm_set_pd(1., 2.);
57928        let b = _mm_set_pd(3., 4.);
57929        let r = _mm_mask_rcp14_sd(src, 0, a, b);
57930        let e = _mm_set_pd(1., 11.);
57931        assert_eq_m128d(r, e);
57932        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
57933        let e = _mm_set_pd(1., 0.25);
57934        assert_eq_m128d(r, e);
57935    }
57936
57937    #[simd_test(enable = "avx512f")]
57938    unsafe fn test_mm_maskz_rcp14_sd() {
57939        let a = _mm_set_pd(1., 2.);
57940        let b = _mm_set_pd(3., 4.);
57941        let r = _mm_maskz_rcp14_sd(0, a, b);
57942        let e = _mm_set_pd(1., 0.);
57943        assert_eq_m128d(r, e);
57944        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
57945        let e = _mm_set_pd(1., 0.25);
57946        assert_eq_m128d(r, e);
57947    }
57948
57949    #[simd_test(enable = "avx512f")]
57950    unsafe fn test_mm_getexp_ss() {
57951        let a = _mm_set1_ps(2.);
57952        let b = _mm_set1_ps(3.);
57953        let r = _mm_getexp_ss(a, b);
57954        let e = _mm_set_ps(2., 2., 2., 1.);
57955        assert_eq_m128(r, e);
57956    }
57957
57958    #[simd_test(enable = "avx512f")]
57959    unsafe fn test_mm_mask_getexp_ss() {
57960        let a = _mm_set1_ps(2.);
57961        let b = _mm_set1_ps(3.);
57962        let r = _mm_mask_getexp_ss(a, 0, a, b);
57963        let e = _mm_set_ps(2., 2., 2., 2.);
57964        assert_eq_m128(r, e);
57965        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
57966        let e = _mm_set_ps(2., 2., 2., 1.);
57967        assert_eq_m128(r, e);
57968    }
57969
57970    #[simd_test(enable = "avx512f")]
57971    unsafe fn test_mm_maskz_getexp_ss() {
57972        let a = _mm_set1_ps(2.);
57973        let b = _mm_set1_ps(3.);
57974        let r = _mm_maskz_getexp_ss(0, a, b);
57975        let e = _mm_set_ps(2., 2., 2., 0.);
57976        assert_eq_m128(r, e);
57977        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
57978        let e = _mm_set_ps(2., 2., 2., 1.);
57979        assert_eq_m128(r, e);
57980    }
57981
57982    #[simd_test(enable = "avx512f")]
57983    unsafe fn test_mm_getexp_sd() {
57984        let a = _mm_set1_pd(2.);
57985        let b = _mm_set1_pd(3.);
57986        let r = _mm_getexp_sd(a, b);
57987        let e = _mm_set_pd(2., 1.);
57988        assert_eq_m128d(r, e);
57989    }
57990
57991    #[simd_test(enable = "avx512f")]
57992    unsafe fn test_mm_mask_getexp_sd() {
57993        let a = _mm_set1_pd(2.);
57994        let b = _mm_set1_pd(3.);
57995        let r = _mm_mask_getexp_sd(a, 0, a, b);
57996        let e = _mm_set_pd(2., 2.);
57997        assert_eq_m128d(r, e);
57998        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
57999        let e = _mm_set_pd(2., 1.);
58000        assert_eq_m128d(r, e);
58001    }
58002
58003    #[simd_test(enable = "avx512f")]
58004    unsafe fn test_mm_maskz_getexp_sd() {
58005        let a = _mm_set1_pd(2.);
58006        let b = _mm_set1_pd(3.);
58007        let r = _mm_maskz_getexp_sd(0, a, b);
58008        let e = _mm_set_pd(2., 0.);
58009        assert_eq_m128d(r, e);
58010        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
58011        let e = _mm_set_pd(2., 1.);
58012        assert_eq_m128d(r, e);
58013    }
58014
58015    #[simd_test(enable = "avx512f")]
58016    unsafe fn test_mm_getmant_ss() {
58017        let a = _mm_set1_ps(20.);
58018        let b = _mm_set1_ps(10.);
58019        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58020        let e = _mm_set_ps(20., 20., 20., 1.25);
58021        assert_eq_m128(r, e);
58022    }
58023
58024    #[simd_test(enable = "avx512f")]
58025    unsafe fn test_mm_mask_getmant_ss() {
58026        let a = _mm_set1_ps(20.);
58027        let b = _mm_set1_ps(10.);
58028        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58029        let e = _mm_set_ps(20., 20., 20., 20.);
58030        assert_eq_m128(r, e);
58031        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58032        let e = _mm_set_ps(20., 20., 20., 1.25);
58033        assert_eq_m128(r, e);
58034    }
58035
58036    #[simd_test(enable = "avx512f")]
58037    unsafe fn test_mm_maskz_getmant_ss() {
58038        let a = _mm_set1_ps(20.);
58039        let b = _mm_set1_ps(10.);
58040        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58041        let e = _mm_set_ps(20., 20., 20., 0.);
58042        assert_eq_m128(r, e);
58043        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58044        let e = _mm_set_ps(20., 20., 20., 1.25);
58045        assert_eq_m128(r, e);
58046    }
58047
58048    #[simd_test(enable = "avx512f")]
58049    unsafe fn test_mm_getmant_sd() {
58050        let a = _mm_set1_pd(20.);
58051        let b = _mm_set1_pd(10.);
58052        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58053        let e = _mm_set_pd(20., 1.25);
58054        assert_eq_m128d(r, e);
58055    }
58056
58057    #[simd_test(enable = "avx512f")]
58058    unsafe fn test_mm_mask_getmant_sd() {
58059        let a = _mm_set1_pd(20.);
58060        let b = _mm_set1_pd(10.);
58061        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58062        let e = _mm_set_pd(20., 20.);
58063        assert_eq_m128d(r, e);
58064        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58065        let e = _mm_set_pd(20., 1.25);
58066        assert_eq_m128d(r, e);
58067    }
58068
58069    #[simd_test(enable = "avx512f")]
58070    unsafe fn test_mm_maskz_getmant_sd() {
58071        let a = _mm_set1_pd(20.);
58072        let b = _mm_set1_pd(10.);
58073        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58074        let e = _mm_set_pd(20., 0.);
58075        assert_eq_m128d(r, e);
58076        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58077        let e = _mm_set_pd(20., 1.25);
58078        assert_eq_m128d(r, e);
58079    }
58080
58081    #[simd_test(enable = "avx512f")]
58082    unsafe fn test_mm_roundscale_ss() {
58083        let a = _mm_set1_ps(2.2);
58084        let b = _mm_set1_ps(1.1);
58085        let r = _mm_roundscale_ss::<0>(a, b);
58086        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58087        assert_eq_m128(r, e);
58088    }
58089
58090    #[simd_test(enable = "avx512f")]
58091    unsafe fn test_mm_mask_roundscale_ss() {
58092        let a = _mm_set1_ps(2.2);
58093        let b = _mm_set1_ps(1.1);
58094        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
58095        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
58096        assert_eq_m128(r, e);
58097        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
58098        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58099        assert_eq_m128(r, e);
58100    }
58101
58102    #[simd_test(enable = "avx512f")]
58103    unsafe fn test_mm_maskz_roundscale_ss() {
58104        let a = _mm_set1_ps(2.2);
58105        let b = _mm_set1_ps(1.1);
58106        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
58107        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
58108        assert_eq_m128(r, e);
58109        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
58110        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58111        assert_eq_m128(r, e);
58112    }
58113
58114    #[simd_test(enable = "avx512f")]
58115    unsafe fn test_mm_roundscale_sd() {
58116        let a = _mm_set1_pd(2.2);
58117        let b = _mm_set1_pd(1.1);
58118        let r = _mm_roundscale_sd::<0>(a, b);
58119        let e = _mm_set_pd(2.2, 1.0);
58120        assert_eq_m128d(r, e);
58121    }
58122
58123    #[simd_test(enable = "avx512f")]
58124    unsafe fn test_mm_mask_roundscale_sd() {
58125        let a = _mm_set1_pd(2.2);
58126        let b = _mm_set1_pd(1.1);
58127        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
58128        let e = _mm_set_pd(2.2, 2.2);
58129        assert_eq_m128d(r, e);
58130        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
58131        let e = _mm_set_pd(2.2, 1.0);
58132        assert_eq_m128d(r, e);
58133    }
58134
58135    #[simd_test(enable = "avx512f")]
58136    unsafe fn test_mm_maskz_roundscale_sd() {
58137        let a = _mm_set1_pd(2.2);
58138        let b = _mm_set1_pd(1.1);
58139        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
58140        let e = _mm_set_pd(2.2, 0.0);
58141        assert_eq_m128d(r, e);
58142        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
58143        let e = _mm_set_pd(2.2, 1.0);
58144        assert_eq_m128d(r, e);
58145    }
58146
58147    #[simd_test(enable = "avx512f")]
58148    unsafe fn test_mm_scalef_ss() {
58149        let a = _mm_set1_ps(1.);
58150        let b = _mm_set1_ps(3.);
58151        let r = _mm_scalef_ss(a, b);
58152        let e = _mm_set_ps(1., 1., 1., 8.);
58153        assert_eq_m128(r, e);
58154    }
58155
58156    #[simd_test(enable = "avx512f")]
58157    unsafe fn test_mm_mask_scalef_ss() {
58158        let a = _mm_set1_ps(1.);
58159        let b = _mm_set1_ps(3.);
58160        let r = _mm_mask_scalef_ss(a, 0, a, b);
58161        let e = _mm_set_ps(1., 1., 1., 1.);
58162        assert_eq_m128(r, e);
58163        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
58164        let e = _mm_set_ps(1., 1., 1., 8.);
58165        assert_eq_m128(r, e);
58166    }
58167
58168    #[simd_test(enable = "avx512f")]
58169    unsafe fn test_mm_maskz_scalef_ss() {
58170        let a = _mm_set1_ps(1.);
58171        let b = _mm_set1_ps(3.);
58172        let r = _mm_maskz_scalef_ss(0, a, b);
58173        let e = _mm_set_ps(1., 1., 1., 0.);
58174        assert_eq_m128(r, e);
58175        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
58176        let e = _mm_set_ps(1., 1., 1., 8.);
58177        assert_eq_m128(r, e);
58178    }
58179
58180    #[simd_test(enable = "avx512f")]
58181    unsafe fn test_mm_scalef_sd() {
58182        let a = _mm_set1_pd(1.);
58183        let b = _mm_set1_pd(3.);
58184        let r = _mm_scalef_sd(a, b);
58185        let e = _mm_set_pd(1., 8.);
58186        assert_eq_m128d(r, e);
58187    }
58188
58189    #[simd_test(enable = "avx512f")]
58190    unsafe fn test_mm_mask_scalef_sd() {
58191        let a = _mm_set1_pd(1.);
58192        let b = _mm_set1_pd(3.);
58193        let r = _mm_mask_scalef_sd(a, 0, a, b);
58194        let e = _mm_set_pd(1., 1.);
58195        assert_eq_m128d(r, e);
58196        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
58197        let e = _mm_set_pd(1., 8.);
58198        assert_eq_m128d(r, e);
58199    }
58200
58201    #[simd_test(enable = "avx512f")]
58202    unsafe fn test_mm_maskz_scalef_sd() {
58203        let a = _mm_set1_pd(1.);
58204        let b = _mm_set1_pd(3.);
58205        let r = _mm_maskz_scalef_sd(0, a, b);
58206        let e = _mm_set_pd(1., 0.);
58207        assert_eq_m128d(r, e);
58208        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
58209        let e = _mm_set_pd(1., 8.);
58210        assert_eq_m128d(r, e);
58211    }
58212
58213    #[simd_test(enable = "avx512f")]
58214    unsafe fn test_mm_mask_fmadd_ss() {
58215        let a = _mm_set1_ps(1.);
58216        let b = _mm_set1_ps(2.);
58217        let c = _mm_set1_ps(3.);
58218        let r = _mm_mask_fmadd_ss(a, 0, b, c);
58219        assert_eq_m128(r, a);
58220        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
58221        let e = _mm_set_ps(1., 1., 1., 5.);
58222        assert_eq_m128(r, e);
58223    }
58224
58225    #[simd_test(enable = "avx512f")]
58226    unsafe fn test_mm_maskz_fmadd_ss() {
58227        let a = _mm_set1_ps(1.);
58228        let b = _mm_set1_ps(2.);
58229        let c = _mm_set1_ps(3.);
58230        let r = _mm_maskz_fmadd_ss(0, a, b, c);
58231        let e = _mm_set_ps(1., 1., 1., 0.);
58232        assert_eq_m128(r, e);
58233        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
58234        let e = _mm_set_ps(1., 1., 1., 5.);
58235        assert_eq_m128(r, e);
58236    }
58237
58238    #[simd_test(enable = "avx512f")]
58239    unsafe fn test_mm_mask3_fmadd_ss() {
58240        let a = _mm_set1_ps(1.);
58241        let b = _mm_set1_ps(2.);
58242        let c = _mm_set1_ps(3.);
58243        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
58244        assert_eq_m128(r, c);
58245        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
58246        let e = _mm_set_ps(3., 3., 3., 5.);
58247        assert_eq_m128(r, e);
58248    }
58249
58250    #[simd_test(enable = "avx512f")]
58251    unsafe fn test_mm_mask_fmadd_sd() {
58252        let a = _mm_set1_pd(1.);
58253        let b = _mm_set1_pd(2.);
58254        let c = _mm_set1_pd(3.);
58255        let r = _mm_mask_fmadd_sd(a, 0, b, c);
58256        assert_eq_m128d(r, a);
58257        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
58258        let e = _mm_set_pd(1., 5.);
58259        assert_eq_m128d(r, e);
58260    }
58261
58262    #[simd_test(enable = "avx512f")]
58263    unsafe fn test_mm_maskz_fmadd_sd() {
58264        let a = _mm_set1_pd(1.);
58265        let b = _mm_set1_pd(2.);
58266        let c = _mm_set1_pd(3.);
58267        let r = _mm_maskz_fmadd_sd(0, a, b, c);
58268        let e = _mm_set_pd(1., 0.);
58269        assert_eq_m128d(r, e);
58270        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
58271        let e = _mm_set_pd(1., 5.);
58272        assert_eq_m128d(r, e);
58273    }
58274
58275    #[simd_test(enable = "avx512f")]
58276    unsafe fn test_mm_mask3_fmadd_sd() {
58277        let a = _mm_set1_pd(1.);
58278        let b = _mm_set1_pd(2.);
58279        let c = _mm_set1_pd(3.);
58280        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
58281        assert_eq_m128d(r, c);
58282        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
58283        let e = _mm_set_pd(3., 5.);
58284        assert_eq_m128d(r, e);
58285    }
58286
58287    #[simd_test(enable = "avx512f")]
58288    unsafe fn test_mm_mask_fmsub_ss() {
58289        let a = _mm_set1_ps(1.);
58290        let b = _mm_set1_ps(2.);
58291        let c = _mm_set1_ps(3.);
58292        let r = _mm_mask_fmsub_ss(a, 0, b, c);
58293        assert_eq_m128(r, a);
58294        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
58295        let e = _mm_set_ps(1., 1., 1., -1.);
58296        assert_eq_m128(r, e);
58297    }
58298
58299    #[simd_test(enable = "avx512f")]
58300    unsafe fn test_mm_maskz_fmsub_ss() {
58301        let a = _mm_set1_ps(1.);
58302        let b = _mm_set1_ps(2.);
58303        let c = _mm_set1_ps(3.);
58304        let r = _mm_maskz_fmsub_ss(0, a, b, c);
58305        let e = _mm_set_ps(1., 1., 1., 0.);
58306        assert_eq_m128(r, e);
58307        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
58308        let e = _mm_set_ps(1., 1., 1., -1.);
58309        assert_eq_m128(r, e);
58310    }
58311
58312    #[simd_test(enable = "avx512f")]
58313    unsafe fn test_mm_mask3_fmsub_ss() {
58314        let a = _mm_set1_ps(1.);
58315        let b = _mm_set1_ps(2.);
58316        let c = _mm_set1_ps(3.);
58317        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
58318        assert_eq_m128(r, c);
58319        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
58320        let e = _mm_set_ps(3., 3., 3., -1.);
58321        assert_eq_m128(r, e);
58322    }
58323
58324    #[simd_test(enable = "avx512f")]
58325    unsafe fn test_mm_mask_fmsub_sd() {
58326        let a = _mm_set1_pd(1.);
58327        let b = _mm_set1_pd(2.);
58328        let c = _mm_set1_pd(3.);
58329        let r = _mm_mask_fmsub_sd(a, 0, b, c);
58330        assert_eq_m128d(r, a);
58331        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
58332        let e = _mm_set_pd(1., -1.);
58333        assert_eq_m128d(r, e);
58334    }
58335
58336    #[simd_test(enable = "avx512f")]
58337    unsafe fn test_mm_maskz_fmsub_sd() {
58338        let a = _mm_set1_pd(1.);
58339        let b = _mm_set1_pd(2.);
58340        let c = _mm_set1_pd(3.);
58341        let r = _mm_maskz_fmsub_sd(0, a, b, c);
58342        let e = _mm_set_pd(1., 0.);
58343        assert_eq_m128d(r, e);
58344        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
58345        let e = _mm_set_pd(1., -1.);
58346        assert_eq_m128d(r, e);
58347    }
58348
58349    #[simd_test(enable = "avx512f")]
58350    unsafe fn test_mm_mask3_fmsub_sd() {
58351        let a = _mm_set1_pd(1.);
58352        let b = _mm_set1_pd(2.);
58353        let c = _mm_set1_pd(3.);
58354        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
58355        assert_eq_m128d(r, c);
58356        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
58357        let e = _mm_set_pd(3., -1.);
58358        assert_eq_m128d(r, e);
58359    }
58360
58361    #[simd_test(enable = "avx512f")]
58362    unsafe fn test_mm_mask_fnmadd_ss() {
58363        let a = _mm_set1_ps(1.);
58364        let b = _mm_set1_ps(2.);
58365        let c = _mm_set1_ps(3.);
58366        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
58367        assert_eq_m128(r, a);
58368        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
58369        let e = _mm_set_ps(1., 1., 1., 1.);
58370        assert_eq_m128(r, e);
58371    }
58372
58373    #[simd_test(enable = "avx512f")]
58374    unsafe fn test_mm_maskz_fnmadd_ss() {
58375        let a = _mm_set1_ps(1.);
58376        let b = _mm_set1_ps(2.);
58377        let c = _mm_set1_ps(3.);
58378        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
58379        let e = _mm_set_ps(1., 1., 1., 0.);
58380        assert_eq_m128(r, e);
58381        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
58382        let e = _mm_set_ps(1., 1., 1., 1.);
58383        assert_eq_m128(r, e);
58384    }
58385
58386    #[simd_test(enable = "avx512f")]
58387    unsafe fn test_mm_mask3_fnmadd_ss() {
58388        let a = _mm_set1_ps(1.);
58389        let b = _mm_set1_ps(2.);
58390        let c = _mm_set1_ps(3.);
58391        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
58392        assert_eq_m128(r, c);
58393        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
58394        let e = _mm_set_ps(3., 3., 3., 1.);
58395        assert_eq_m128(r, e);
58396    }
58397
58398    #[simd_test(enable = "avx512f")]
58399    unsafe fn test_mm_mask_fnmadd_sd() {
58400        let a = _mm_set1_pd(1.);
58401        let b = _mm_set1_pd(2.);
58402        let c = _mm_set1_pd(3.);
58403        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
58404        assert_eq_m128d(r, a);
58405        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
58406        let e = _mm_set_pd(1., 1.);
58407        assert_eq_m128d(r, e);
58408    }
58409
58410    #[simd_test(enable = "avx512f")]
58411    unsafe fn test_mm_maskz_fnmadd_sd() {
58412        let a = _mm_set1_pd(1.);
58413        let b = _mm_set1_pd(2.);
58414        let c = _mm_set1_pd(3.);
58415        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
58416        let e = _mm_set_pd(1., 0.);
58417        assert_eq_m128d(r, e);
58418        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
58419        let e = _mm_set_pd(1., 1.);
58420        assert_eq_m128d(r, e);
58421    }
58422
58423    #[simd_test(enable = "avx512f")]
58424    unsafe fn test_mm_mask3_fnmadd_sd() {
58425        let a = _mm_set1_pd(1.);
58426        let b = _mm_set1_pd(2.);
58427        let c = _mm_set1_pd(3.);
58428        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
58429        assert_eq_m128d(r, c);
58430        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
58431        let e = _mm_set_pd(3., 1.);
58432        assert_eq_m128d(r, e);
58433    }
58434
58435    #[simd_test(enable = "avx512f")]
58436    unsafe fn test_mm_mask_fnmsub_ss() {
58437        let a = _mm_set1_ps(1.);
58438        let b = _mm_set1_ps(2.);
58439        let c = _mm_set1_ps(3.);
58440        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
58441        assert_eq_m128(r, a);
58442        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
58443        let e = _mm_set_ps(1., 1., 1., -5.);
58444        assert_eq_m128(r, e);
58445    }
58446
58447    #[simd_test(enable = "avx512f")]
58448    unsafe fn test_mm_maskz_fnmsub_ss() {
58449        let a = _mm_set1_ps(1.);
58450        let b = _mm_set1_ps(2.);
58451        let c = _mm_set1_ps(3.);
58452        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
58453        let e = _mm_set_ps(1., 1., 1., 0.);
58454        assert_eq_m128(r, e);
58455        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
58456        let e = _mm_set_ps(1., 1., 1., -5.);
58457        assert_eq_m128(r, e);
58458    }
58459
58460    #[simd_test(enable = "avx512f")]
58461    unsafe fn test_mm_mask3_fnmsub_ss() {
58462        let a = _mm_set1_ps(1.);
58463        let b = _mm_set1_ps(2.);
58464        let c = _mm_set1_ps(3.);
58465        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
58466        assert_eq_m128(r, c);
58467        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
58468        let e = _mm_set_ps(3., 3., 3., -5.);
58469        assert_eq_m128(r, e);
58470    }
58471
58472    #[simd_test(enable = "avx512f")]
58473    unsafe fn test_mm_mask_fnmsub_sd() {
58474        let a = _mm_set1_pd(1.);
58475        let b = _mm_set1_pd(2.);
58476        let c = _mm_set1_pd(3.);
58477        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
58478        assert_eq_m128d(r, a);
58479        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
58480        let e = _mm_set_pd(1., -5.);
58481        assert_eq_m128d(r, e);
58482    }
58483
58484    #[simd_test(enable = "avx512f")]
58485    unsafe fn test_mm_maskz_fnmsub_sd() {
58486        let a = _mm_set1_pd(1.);
58487        let b = _mm_set1_pd(2.);
58488        let c = _mm_set1_pd(3.);
58489        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
58490        let e = _mm_set_pd(1., 0.);
58491        assert_eq_m128d(r, e);
58492        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
58493        let e = _mm_set_pd(1., -5.);
58494        assert_eq_m128d(r, e);
58495    }
58496
58497    #[simd_test(enable = "avx512f")]
58498    unsafe fn test_mm_mask3_fnmsub_sd() {
58499        let a = _mm_set1_pd(1.);
58500        let b = _mm_set1_pd(2.);
58501        let c = _mm_set1_pd(3.);
58502        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
58503        assert_eq_m128d(r, c);
58504        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
58505        let e = _mm_set_pd(3., -5.);
58506        assert_eq_m128d(r, e);
58507    }
58508
58509    #[simd_test(enable = "avx512f")]
58510    unsafe fn test_mm_add_round_ss() {
58511        let a = _mm_set_ps(1., 2., 10., 20.);
58512        let b = _mm_set_ps(3., 4., 30., 40.);
58513        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58514        let e = _mm_set_ps(1., 2., 10., 60.);
58515        assert_eq_m128(r, e);
58516    }
58517
58518    #[simd_test(enable = "avx512f")]
58519    unsafe fn test_mm_mask_add_round_ss() {
58520        let src = _mm_set_ps(10., 11., 100., 110.);
58521        let a = _mm_set_ps(1., 2., 10., 20.);
58522        let b = _mm_set_ps(3., 4., 30., 40.);
58523        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58524        let e = _mm_set_ps(1., 2., 10., 110.);
58525        assert_eq_m128(r, e);
58526        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58527            src, 0b11111111, a, b,
58528        );
58529        let e = _mm_set_ps(1., 2., 10., 60.);
58530        assert_eq_m128(r, e);
58531    }
58532
58533    #[simd_test(enable = "avx512f")]
58534    unsafe fn test_mm_maskz_add_round_ss() {
58535        let a = _mm_set_ps(1., 2., 10., 20.);
58536        let b = _mm_set_ps(3., 4., 30., 40.);
58537        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58538        let e = _mm_set_ps(1., 2., 10., 0.);
58539        assert_eq_m128(r, e);
58540        let r =
58541            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58542        let e = _mm_set_ps(1., 2., 10., 60.);
58543        assert_eq_m128(r, e);
58544    }
58545
58546    #[simd_test(enable = "avx512f")]
58547    unsafe fn test_mm_add_round_sd() {
58548        let a = _mm_set_pd(1., 2.);
58549        let b = _mm_set_pd(3., 4.);
58550        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58551        let e = _mm_set_pd(1., 6.);
58552        assert_eq_m128d(r, e);
58553    }
58554
58555    #[simd_test(enable = "avx512f")]
58556    unsafe fn test_mm_mask_add_round_sd() {
58557        let src = _mm_set_pd(10., 11.);
58558        let a = _mm_set_pd(1., 2.);
58559        let b = _mm_set_pd(3., 4.);
58560        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58561        let e = _mm_set_pd(1., 11.);
58562        assert_eq_m128d(r, e);
58563        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58564            src, 0b11111111, a, b,
58565        );
58566        let e = _mm_set_pd(1., 6.);
58567        assert_eq_m128d(r, e);
58568    }
58569
58570    #[simd_test(enable = "avx512f")]
58571    unsafe fn test_mm_maskz_add_round_sd() {
58572        let a = _mm_set_pd(1., 2.);
58573        let b = _mm_set_pd(3., 4.);
58574        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58575        let e = _mm_set_pd(1., 0.);
58576        assert_eq_m128d(r, e);
58577        let r =
58578            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58579        let e = _mm_set_pd(1., 6.);
58580        assert_eq_m128d(r, e);
58581    }
58582
58583    #[simd_test(enable = "avx512f")]
58584    unsafe fn test_mm_sub_round_ss() {
58585        let a = _mm_set_ps(1., 2., 10., 20.);
58586        let b = _mm_set_ps(3., 4., 30., 40.);
58587        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58588        let e = _mm_set_ps(1., 2., 10., -20.);
58589        assert_eq_m128(r, e);
58590    }
58591
58592    #[simd_test(enable = "avx512f")]
58593    unsafe fn test_mm_mask_sub_round_ss() {
58594        let src = _mm_set_ps(10., 11., 100., 110.);
58595        let a = _mm_set_ps(1., 2., 10., 20.);
58596        let b = _mm_set_ps(3., 4., 30., 40.);
58597        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58598        let e = _mm_set_ps(1., 2., 10., 110.);
58599        assert_eq_m128(r, e);
58600        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58601            src, 0b11111111, a, b,
58602        );
58603        let e = _mm_set_ps(1., 2., 10., -20.);
58604        assert_eq_m128(r, e);
58605    }
58606
58607    #[simd_test(enable = "avx512f")]
58608    unsafe fn test_mm_maskz_sub_round_ss() {
58609        let a = _mm_set_ps(1., 2., 10., 20.);
58610        let b = _mm_set_ps(3., 4., 30., 40.);
58611        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58612        let e = _mm_set_ps(1., 2., 10., 0.);
58613        assert_eq_m128(r, e);
58614        let r =
58615            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58616        let e = _mm_set_ps(1., 2., 10., -20.);
58617        assert_eq_m128(r, e);
58618    }
58619
58620    #[simd_test(enable = "avx512f")]
58621    unsafe fn test_mm_sub_round_sd() {
58622        let a = _mm_set_pd(1., 2.);
58623        let b = _mm_set_pd(3., 4.);
58624        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58625        let e = _mm_set_pd(1., -2.);
58626        assert_eq_m128d(r, e);
58627    }
58628
58629    #[simd_test(enable = "avx512f")]
58630    unsafe fn test_mm_mask_sub_round_sd() {
58631        let src = _mm_set_pd(10., 11.);
58632        let a = _mm_set_pd(1., 2.);
58633        let b = _mm_set_pd(3., 4.);
58634        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58635        let e = _mm_set_pd(1., 11.);
58636        assert_eq_m128d(r, e);
58637        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58638            src, 0b11111111, a, b,
58639        );
58640        let e = _mm_set_pd(1., -2.);
58641        assert_eq_m128d(r, e);
58642    }
58643
58644    #[simd_test(enable = "avx512f")]
58645    unsafe fn test_mm_maskz_sub_round_sd() {
58646        let a = _mm_set_pd(1., 2.);
58647        let b = _mm_set_pd(3., 4.);
58648        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58649        let e = _mm_set_pd(1., 0.);
58650        assert_eq_m128d(r, e);
58651        let r =
58652            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58653        let e = _mm_set_pd(1., -2.);
58654        assert_eq_m128d(r, e);
58655    }
58656
58657    #[simd_test(enable = "avx512f")]
58658    unsafe fn test_mm_mul_round_ss() {
58659        let a = _mm_set_ps(1., 2., 10., 20.);
58660        let b = _mm_set_ps(3., 4., 30., 40.);
58661        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58662        let e = _mm_set_ps(1., 2., 10., 800.);
58663        assert_eq_m128(r, e);
58664    }
58665
58666    #[simd_test(enable = "avx512f")]
58667    unsafe fn test_mm_mask_mul_round_ss() {
58668        let src = _mm_set_ps(10., 11., 100., 110.);
58669        let a = _mm_set_ps(1., 2., 10., 20.);
58670        let b = _mm_set_ps(3., 4., 30., 40.);
58671        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58672        let e = _mm_set_ps(1., 2., 10., 110.);
58673        assert_eq_m128(r, e);
58674        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58675            src, 0b11111111, a, b,
58676        );
58677        let e = _mm_set_ps(1., 2., 10., 800.);
58678        assert_eq_m128(r, e);
58679    }
58680
58681    #[simd_test(enable = "avx512f")]
58682    unsafe fn test_mm_maskz_mul_round_ss() {
58683        let a = _mm_set_ps(1., 2., 10., 20.);
58684        let b = _mm_set_ps(3., 4., 30., 40.);
58685        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58686        let e = _mm_set_ps(1., 2., 10., 0.);
58687        assert_eq_m128(r, e);
58688        let r =
58689            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58690        let e = _mm_set_ps(1., 2., 10., 800.);
58691        assert_eq_m128(r, e);
58692    }
58693
58694    #[simd_test(enable = "avx512f")]
58695    unsafe fn test_mm_mul_round_sd() {
58696        let a = _mm_set_pd(1., 2.);
58697        let b = _mm_set_pd(3., 4.);
58698        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58699        let e = _mm_set_pd(1., 8.);
58700        assert_eq_m128d(r, e);
58701    }
58702
58703    #[simd_test(enable = "avx512f")]
58704    unsafe fn test_mm_mask_mul_round_sd() {
58705        let src = _mm_set_pd(10., 11.);
58706        let a = _mm_set_pd(1., 2.);
58707        let b = _mm_set_pd(3., 4.);
58708        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58709        let e = _mm_set_pd(1., 11.);
58710        assert_eq_m128d(r, e);
58711        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58712            src, 0b11111111, a, b,
58713        );
58714        let e = _mm_set_pd(1., 8.);
58715        assert_eq_m128d(r, e);
58716    }
58717
58718    #[simd_test(enable = "avx512f")]
58719    unsafe fn test_mm_maskz_mul_round_sd() {
58720        let a = _mm_set_pd(1., 2.);
58721        let b = _mm_set_pd(3., 4.);
58722        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58723        let e = _mm_set_pd(1., 0.);
58724        assert_eq_m128d(r, e);
58725        let r =
58726            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58727        let e = _mm_set_pd(1., 8.);
58728        assert_eq_m128d(r, e);
58729    }
58730
58731    #[simd_test(enable = "avx512f")]
58732    unsafe fn test_mm_div_round_ss() {
58733        let a = _mm_set_ps(1., 2., 10., 20.);
58734        let b = _mm_set_ps(3., 4., 30., 40.);
58735        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58736        let e = _mm_set_ps(1., 2., 10., 0.5);
58737        assert_eq_m128(r, e);
58738    }
58739
58740    #[simd_test(enable = "avx512f")]
58741    unsafe fn test_mm_mask_div_round_ss() {
58742        let src = _mm_set_ps(10., 11., 100., 110.);
58743        let a = _mm_set_ps(1., 2., 10., 20.);
58744        let b = _mm_set_ps(3., 4., 30., 40.);
58745        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58746        let e = _mm_set_ps(1., 2., 10., 110.);
58747        assert_eq_m128(r, e);
58748        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58749            src, 0b11111111, a, b,
58750        );
58751        let e = _mm_set_ps(1., 2., 10., 0.5);
58752        assert_eq_m128(r, e);
58753    }
58754
58755    #[simd_test(enable = "avx512f")]
58756    unsafe fn test_mm_maskz_div_round_ss() {
58757        let a = _mm_set_ps(1., 2., 10., 20.);
58758        let b = _mm_set_ps(3., 4., 30., 40.);
58759        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58760        let e = _mm_set_ps(1., 2., 10., 0.);
58761        assert_eq_m128(r, e);
58762        let r =
58763            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58764        let e = _mm_set_ps(1., 2., 10., 0.5);
58765        assert_eq_m128(r, e);
58766    }
58767
58768    #[simd_test(enable = "avx512f")]
58769    unsafe fn test_mm_div_round_sd() {
58770        let a = _mm_set_pd(1., 2.);
58771        let b = _mm_set_pd(3., 4.);
58772        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58773        let e = _mm_set_pd(1., 0.5);
58774        assert_eq_m128d(r, e);
58775    }
58776
58777    #[simd_test(enable = "avx512f")]
58778    unsafe fn test_mm_mask_div_round_sd() {
58779        let src = _mm_set_pd(10., 11.);
58780        let a = _mm_set_pd(1., 2.);
58781        let b = _mm_set_pd(3., 4.);
58782        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58783        let e = _mm_set_pd(1., 11.);
58784        assert_eq_m128d(r, e);
58785        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58786            src, 0b11111111, a, b,
58787        );
58788        let e = _mm_set_pd(1., 0.5);
58789        assert_eq_m128d(r, e);
58790    }
58791
58792    #[simd_test(enable = "avx512f")]
58793    unsafe fn test_mm_maskz_div_round_sd() {
58794        let a = _mm_set_pd(1., 2.);
58795        let b = _mm_set_pd(3., 4.);
58796        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58797        let e = _mm_set_pd(1., 0.);
58798        assert_eq_m128d(r, e);
58799        let r =
58800            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58801        let e = _mm_set_pd(1., 0.5);
58802        assert_eq_m128d(r, e);
58803    }
58804
58805    #[simd_test(enable = "avx512f")]
58806    unsafe fn test_mm_max_round_ss() {
58807        let a = _mm_set_ps(0., 1., 2., 3.);
58808        let b = _mm_set_ps(4., 5., 6., 7.);
58809        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58810        let e = _mm_set_ps(0., 1., 2., 7.);
58811        assert_eq_m128(r, e);
58812    }
58813
58814    #[simd_test(enable = "avx512f")]
58815    unsafe fn test_mm_mask_max_round_ss() {
58816        let a = _mm_set_ps(0., 1., 2., 3.);
58817        let b = _mm_set_ps(4., 5., 6., 7.);
58818        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58819        let e = _mm_set_ps(0., 1., 2., 3.);
58820        assert_eq_m128(r, e);
58821        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58822        let e = _mm_set_ps(0., 1., 2., 7.);
58823        assert_eq_m128(r, e);
58824    }
58825
58826    #[simd_test(enable = "avx512f")]
58827    unsafe fn test_mm_maskz_max_round_ss() {
58828        let a = _mm_set_ps(0., 1., 2., 3.);
58829        let b = _mm_set_ps(4., 5., 6., 7.);
58830        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58831        let e = _mm_set_ps(0., 1., 2., 0.);
58832        assert_eq_m128(r, e);
58833        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58834        let e = _mm_set_ps(0., 1., 2., 7.);
58835        assert_eq_m128(r, e);
58836    }
58837
58838    #[simd_test(enable = "avx512f")]
58839    unsafe fn test_mm_max_round_sd() {
58840        let a = _mm_set_pd(0., 1.);
58841        let b = _mm_set_pd(2., 3.);
58842        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58843        let e = _mm_set_pd(0., 3.);
58844        assert_eq_m128d(r, e);
58845    }
58846
58847    #[simd_test(enable = "avx512f")]
58848    unsafe fn test_mm_mask_max_round_sd() {
58849        let a = _mm_set_pd(0., 1.);
58850        let b = _mm_set_pd(2., 3.);
58851        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58852        let e = _mm_set_pd(0., 1.);
58853        assert_eq_m128d(r, e);
58854        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58855        let e = _mm_set_pd(0., 3.);
58856        assert_eq_m128d(r, e);
58857    }
58858
58859    #[simd_test(enable = "avx512f")]
58860    unsafe fn test_mm_maskz_max_round_sd() {
58861        let a = _mm_set_pd(0., 1.);
58862        let b = _mm_set_pd(2., 3.);
58863        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58864        let e = _mm_set_pd(0., 0.);
58865        assert_eq_m128d(r, e);
58866        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58867        let e = _mm_set_pd(0., 3.);
58868        assert_eq_m128d(r, e);
58869    }
58870
58871    #[simd_test(enable = "avx512f")]
58872    unsafe fn test_mm_min_round_ss() {
58873        let a = _mm_set_ps(0., 1., 2., 3.);
58874        let b = _mm_set_ps(4., 5., 6., 7.);
58875        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58876        let e = _mm_set_ps(0., 1., 2., 3.);
58877        assert_eq_m128(r, e);
58878    }
58879
58880    #[simd_test(enable = "avx512f")]
58881    unsafe fn test_mm_mask_min_round_ss() {
58882        let a = _mm_set_ps(0., 1., 2., 3.);
58883        let b = _mm_set_ps(4., 5., 6., 7.);
58884        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58885        let e = _mm_set_ps(0., 1., 2., 3.);
58886        assert_eq_m128(r, e);
58887        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58888        let e = _mm_set_ps(0., 1., 2., 3.);
58889        assert_eq_m128(r, e);
58890    }
58891
58892    #[simd_test(enable = "avx512f")]
58893    unsafe fn test_mm_maskz_min_round_ss() {
58894        let a = _mm_set_ps(0., 1., 2., 3.);
58895        let b = _mm_set_ps(4., 5., 6., 7.);
58896        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58897        let e = _mm_set_ps(0., 1., 2., 0.);
58898        assert_eq_m128(r, e);
58899        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58900        let e = _mm_set_ps(0., 1., 2., 3.);
58901        assert_eq_m128(r, e);
58902    }
58903
58904    #[simd_test(enable = "avx512f")]
58905    unsafe fn test_mm_min_round_sd() {
58906        let a = _mm_set_pd(0., 1.);
58907        let b = _mm_set_pd(2., 3.);
58908        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58909        let e = _mm_set_pd(0., 1.);
58910        assert_eq_m128d(r, e);
58911    }
58912
58913    #[simd_test(enable = "avx512f")]
58914    unsafe fn test_mm_mask_min_round_sd() {
58915        let a = _mm_set_pd(0., 1.);
58916        let b = _mm_set_pd(2., 3.);
58917        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58918        let e = _mm_set_pd(0., 1.);
58919        assert_eq_m128d(r, e);
58920        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58921        let e = _mm_set_pd(0., 1.);
58922        assert_eq_m128d(r, e);
58923    }
58924
58925    #[simd_test(enable = "avx512f")]
58926    unsafe fn test_mm_maskz_min_round_sd() {
58927        let a = _mm_set_pd(0., 1.);
58928        let b = _mm_set_pd(2., 3.);
58929        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58930        let e = _mm_set_pd(0., 0.);
58931        assert_eq_m128d(r, e);
58932        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58933        let e = _mm_set_pd(0., 1.);
58934        assert_eq_m128d(r, e);
58935    }
58936
58937    #[simd_test(enable = "avx512f")]
58938    unsafe fn test_mm_sqrt_round_ss() {
58939        let a = _mm_set_ps(1., 2., 10., 20.);
58940        let b = _mm_set_ps(3., 4., 30., 4.);
58941        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58942        let e = _mm_set_ps(1., 2., 10., 2.);
58943        assert_eq_m128(r, e);
58944    }
58945
58946    #[simd_test(enable = "avx512f")]
58947    unsafe fn test_mm_mask_sqrt_round_ss() {
58948        let src = _mm_set_ps(10., 11., 100., 110.);
58949        let a = _mm_set_ps(1., 2., 10., 20.);
58950        let b = _mm_set_ps(3., 4., 30., 4.);
58951        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58952        let e = _mm_set_ps(1., 2., 10., 110.);
58953        assert_eq_m128(r, e);
58954        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58955            src, 0b11111111, a, b,
58956        );
58957        let e = _mm_set_ps(1., 2., 10., 2.);
58958        assert_eq_m128(r, e);
58959    }
58960
58961    #[simd_test(enable = "avx512f")]
58962    unsafe fn test_mm_maskz_sqrt_round_ss() {
58963        let a = _mm_set_ps(1., 2., 10., 20.);
58964        let b = _mm_set_ps(3., 4., 30., 4.);
58965        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58966        let e = _mm_set_ps(1., 2., 10., 0.);
58967        assert_eq_m128(r, e);
58968        let r =
58969            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58970        let e = _mm_set_ps(1., 2., 10., 2.);
58971        assert_eq_m128(r, e);
58972    }
58973
58974    #[simd_test(enable = "avx512f")]
58975    unsafe fn test_mm_sqrt_round_sd() {
58976        let a = _mm_set_pd(1., 2.);
58977        let b = _mm_set_pd(3., 4.);
58978        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58979        let e = _mm_set_pd(1., 2.);
58980        assert_eq_m128d(r, e);
58981    }
58982
58983    #[simd_test(enable = "avx512f")]
58984    unsafe fn test_mm_mask_sqrt_round_sd() {
58985        let src = _mm_set_pd(10., 11.);
58986        let a = _mm_set_pd(1., 2.);
58987        let b = _mm_set_pd(3., 4.);
58988        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58989        let e = _mm_set_pd(1., 11.);
58990        assert_eq_m128d(r, e);
58991        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58992            src, 0b11111111, a, b,
58993        );
58994        let e = _mm_set_pd(1., 2.);
58995        assert_eq_m128d(r, e);
58996    }
58997
58998    #[simd_test(enable = "avx512f")]
58999    unsafe fn test_mm_maskz_sqrt_round_sd() {
59000        let a = _mm_set_pd(1., 2.);
59001        let b = _mm_set_pd(3., 4.);
59002        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59003        let e = _mm_set_pd(1., 0.);
59004        assert_eq_m128d(r, e);
59005        let r =
59006            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59007        let e = _mm_set_pd(1., 2.);
59008        assert_eq_m128d(r, e);
59009    }
59010
59011    #[simd_test(enable = "avx512f")]
59012    unsafe fn test_mm_getexp_round_ss() {
59013        let a = _mm_set1_ps(2.);
59014        let b = _mm_set1_ps(3.);
59015        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59016        let e = _mm_set_ps(2., 2., 2., 1.);
59017        assert_eq_m128(r, e);
59018    }
59019
59020    #[simd_test(enable = "avx512f")]
59021    unsafe fn test_mm_mask_getexp_round_ss() {
59022        let a = _mm_set1_ps(2.);
59023        let b = _mm_set1_ps(3.);
59024        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59025        let e = _mm_set_ps(2., 2., 2., 2.);
59026        assert_eq_m128(r, e);
59027        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59028        let e = _mm_set_ps(2., 2., 2., 1.);
59029        assert_eq_m128(r, e);
59030    }
59031
59032    #[simd_test(enable = "avx512f")]
59033    unsafe fn test_mm_maskz_getexp_round_ss() {
59034        let a = _mm_set1_ps(2.);
59035        let b = _mm_set1_ps(3.);
59036        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59037        let e = _mm_set_ps(2., 2., 2., 0.);
59038        assert_eq_m128(r, e);
59039        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59040        let e = _mm_set_ps(2., 2., 2., 1.);
59041        assert_eq_m128(r, e);
59042    }
59043
59044    #[simd_test(enable = "avx512f")]
59045    unsafe fn test_mm_getexp_round_sd() {
59046        let a = _mm_set1_pd(2.);
59047        let b = _mm_set1_pd(3.);
59048        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59049        let e = _mm_set_pd(2., 1.);
59050        assert_eq_m128d(r, e);
59051    }
59052
59053    #[simd_test(enable = "avx512f")]
59054    unsafe fn test_mm_mask_getexp_round_sd() {
59055        let a = _mm_set1_pd(2.);
59056        let b = _mm_set1_pd(3.);
59057        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59058        let e = _mm_set_pd(2., 2.);
59059        assert_eq_m128d(r, e);
59060        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59061        let e = _mm_set_pd(2., 1.);
59062        assert_eq_m128d(r, e);
59063    }
59064
59065    #[simd_test(enable = "avx512f")]
59066    unsafe fn test_mm_maskz_getexp_round_sd() {
59067        let a = _mm_set1_pd(2.);
59068        let b = _mm_set1_pd(3.);
59069        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59070        let e = _mm_set_pd(2., 0.);
59071        assert_eq_m128d(r, e);
59072        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59073        let e = _mm_set_pd(2., 1.);
59074        assert_eq_m128d(r, e);
59075    }
59076
59077    #[simd_test(enable = "avx512f")]
59078    unsafe fn test_mm_getmant_round_ss() {
59079        let a = _mm_set1_ps(20.);
59080        let b = _mm_set1_ps(10.);
59081        let r =
59082            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59083                a, b,
59084            );
59085        let e = _mm_set_ps(20., 20., 20., 1.25);
59086        assert_eq_m128(r, e);
59087    }
59088
59089    #[simd_test(enable = "avx512f")]
59090    unsafe fn test_mm_mask_getmant_round_ss() {
59091        let a = _mm_set1_ps(20.);
59092        let b = _mm_set1_ps(10.);
59093        let r = _mm_mask_getmant_round_ss::<
59094            _MM_MANT_NORM_1_2,
59095            _MM_MANT_SIGN_SRC,
59096            _MM_FROUND_CUR_DIRECTION,
59097        >(a, 0, a, b);
59098        let e = _mm_set_ps(20., 20., 20., 20.);
59099        assert_eq_m128(r, e);
59100        let r = _mm_mask_getmant_round_ss::<
59101            _MM_MANT_NORM_1_2,
59102            _MM_MANT_SIGN_SRC,
59103            _MM_FROUND_CUR_DIRECTION,
59104        >(a, 0b11111111, a, b);
59105        let e = _mm_set_ps(20., 20., 20., 1.25);
59106        assert_eq_m128(r, e);
59107    }
59108
59109    #[simd_test(enable = "avx512f")]
59110    unsafe fn test_mm_maskz_getmant_round_ss() {
59111        let a = _mm_set1_ps(20.);
59112        let b = _mm_set1_ps(10.);
59113        let r = _mm_maskz_getmant_round_ss::<
59114            _MM_MANT_NORM_1_2,
59115            _MM_MANT_SIGN_SRC,
59116            _MM_FROUND_CUR_DIRECTION,
59117        >(0, a, b);
59118        let e = _mm_set_ps(20., 20., 20., 0.);
59119        assert_eq_m128(r, e);
59120        let r = _mm_maskz_getmant_round_ss::<
59121            _MM_MANT_NORM_1_2,
59122            _MM_MANT_SIGN_SRC,
59123            _MM_FROUND_CUR_DIRECTION,
59124        >(0b11111111, a, b);
59125        let e = _mm_set_ps(20., 20., 20., 1.25);
59126        assert_eq_m128(r, e);
59127    }
59128
59129    #[simd_test(enable = "avx512f")]
59130    unsafe fn test_mm_getmant_round_sd() {
59131        let a = _mm_set1_pd(20.);
59132        let b = _mm_set1_pd(10.);
59133        let r =
59134            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59135                a, b,
59136            );
59137        let e = _mm_set_pd(20., 1.25);
59138        assert_eq_m128d(r, e);
59139    }
59140
59141    #[simd_test(enable = "avx512f")]
59142    unsafe fn test_mm_mask_getmant_round_sd() {
59143        let a = _mm_set1_pd(20.);
59144        let b = _mm_set1_pd(10.);
59145        let r = _mm_mask_getmant_round_sd::<
59146            _MM_MANT_NORM_1_2,
59147            _MM_MANT_SIGN_SRC,
59148            _MM_FROUND_CUR_DIRECTION,
59149        >(a, 0, a, b);
59150        let e = _mm_set_pd(20., 20.);
59151        assert_eq_m128d(r, e);
59152        let r = _mm_mask_getmant_round_sd::<
59153            _MM_MANT_NORM_1_2,
59154            _MM_MANT_SIGN_SRC,
59155            _MM_FROUND_CUR_DIRECTION,
59156        >(a, 0b11111111, a, b);
59157        let e = _mm_set_pd(20., 1.25);
59158        assert_eq_m128d(r, e);
59159    }
59160
59161    #[simd_test(enable = "avx512f")]
59162    unsafe fn test_mm_maskz_getmant_round_sd() {
59163        let a = _mm_set1_pd(20.);
59164        let b = _mm_set1_pd(10.);
59165        let r = _mm_maskz_getmant_round_sd::<
59166            _MM_MANT_NORM_1_2,
59167            _MM_MANT_SIGN_SRC,
59168            _MM_FROUND_CUR_DIRECTION,
59169        >(0, a, b);
59170        let e = _mm_set_pd(20., 0.);
59171        assert_eq_m128d(r, e);
59172        let r = _mm_maskz_getmant_round_sd::<
59173            _MM_MANT_NORM_1_2,
59174            _MM_MANT_SIGN_SRC,
59175            _MM_FROUND_CUR_DIRECTION,
59176        >(0b11111111, a, b);
59177        let e = _mm_set_pd(20., 1.25);
59178        assert_eq_m128d(r, e);
59179    }
59180
59181    #[simd_test(enable = "avx512f")]
59182    unsafe fn test_mm_roundscale_round_ss() {
59183        let a = _mm_set1_ps(2.2);
59184        let b = _mm_set1_ps(1.1);
59185        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59186        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59187        assert_eq_m128(r, e);
59188    }
59189
59190    #[simd_test(enable = "avx512f")]
59191    unsafe fn test_mm_mask_roundscale_round_ss() {
59192        let a = _mm_set1_ps(2.2);
59193        let b = _mm_set1_ps(1.1);
59194        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59195        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59196        assert_eq_m128(r, e);
59197        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59198        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59199        assert_eq_m128(r, e);
59200    }
59201
59202    #[simd_test(enable = "avx512f")]
59203    unsafe fn test_mm_maskz_roundscale_round_ss() {
59204        let a = _mm_set1_ps(2.2);
59205        let b = _mm_set1_ps(1.1);
59206        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59207        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59208        assert_eq_m128(r, e);
59209        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59210        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59211        assert_eq_m128(r, e);
59212    }
59213
59214    #[simd_test(enable = "avx512f")]
59215    unsafe fn test_mm_roundscale_round_sd() {
59216        let a = _mm_set1_pd(2.2);
59217        let b = _mm_set1_pd(1.1);
59218        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59219        let e = _mm_set_pd(2.2, 1.0);
59220        assert_eq_m128d(r, e);
59221    }
59222
59223    #[simd_test(enable = "avx512f")]
59224    unsafe fn test_mm_mask_roundscale_round_sd() {
59225        let a = _mm_set1_pd(2.2);
59226        let b = _mm_set1_pd(1.1);
59227        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59228        let e = _mm_set_pd(2.2, 2.2);
59229        assert_eq_m128d(r, e);
59230        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59231        let e = _mm_set_pd(2.2, 1.0);
59232        assert_eq_m128d(r, e);
59233    }
59234
59235    #[simd_test(enable = "avx512f")]
59236    unsafe fn test_mm_maskz_roundscale_round_sd() {
59237        let a = _mm_set1_pd(2.2);
59238        let b = _mm_set1_pd(1.1);
59239        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59240        let e = _mm_set_pd(2.2, 0.0);
59241        assert_eq_m128d(r, e);
59242        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59243        let e = _mm_set_pd(2.2, 1.0);
59244        assert_eq_m128d(r, e);
59245    }
59246
59247    #[simd_test(enable = "avx512f")]
59248    unsafe fn test_mm_scalef_round_ss() {
59249        let a = _mm_set1_ps(1.);
59250        let b = _mm_set1_ps(3.);
59251        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59252        let e = _mm_set_ps(1., 1., 1., 8.);
59253        assert_eq_m128(r, e);
59254    }
59255
59256    #[simd_test(enable = "avx512f")]
59257    unsafe fn test_mm_mask_scalef_round_ss() {
59258        let a = _mm_set1_ps(1.);
59259        let b = _mm_set1_ps(3.);
59260        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59261            a, 0, a, b,
59262        );
59263        let e = _mm_set_ps(1., 1., 1., 1.);
59264        assert_eq_m128(r, e);
59265        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59266            a, 0b11111111, a, b,
59267        );
59268        let e = _mm_set_ps(1., 1., 1., 8.);
59269        assert_eq_m128(r, e);
59270    }
59271
59272    #[simd_test(enable = "avx512f")]
59273    unsafe fn test_mm_maskz_scalef_round_ss() {
59274        let a = _mm_set1_ps(1.);
59275        let b = _mm_set1_ps(3.);
59276        let r =
59277            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59278        let e = _mm_set_ps(1., 1., 1., 0.);
59279        assert_eq_m128(r, e);
59280        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59281            0b11111111, a, b,
59282        );
59283        let e = _mm_set_ps(1., 1., 1., 8.);
59284        assert_eq_m128(r, e);
59285    }
59286
59287    #[simd_test(enable = "avx512f")]
59288    unsafe fn test_mm_scalef_round_sd() {
59289        let a = _mm_set1_pd(1.);
59290        let b = _mm_set1_pd(3.);
59291        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59292        let e = _mm_set_pd(1., 8.);
59293        assert_eq_m128d(r, e);
59294    }
59295
59296    #[simd_test(enable = "avx512f")]
59297    unsafe fn test_mm_mask_scalef_round_sd() {
59298        let a = _mm_set1_pd(1.);
59299        let b = _mm_set1_pd(3.);
59300        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59301            a, 0, a, b,
59302        );
59303        let e = _mm_set_pd(1., 1.);
59304        assert_eq_m128d(r, e);
59305        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59306            a, 0b11111111, a, b,
59307        );
59308        let e = _mm_set_pd(1., 8.);
59309        assert_eq_m128d(r, e);
59310    }
59311
59312    #[simd_test(enable = "avx512f")]
59313    unsafe fn test_mm_maskz_scalef_round_sd() {
59314        let a = _mm_set1_pd(1.);
59315        let b = _mm_set1_pd(3.);
59316        let r =
59317            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59318        let e = _mm_set_pd(1., 0.);
59319        assert_eq_m128d(r, e);
59320        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59321            0b11111111, a, b,
59322        );
59323        let e = _mm_set_pd(1., 8.);
59324        assert_eq_m128d(r, e);
59325    }
59326
59327    #[simd_test(enable = "avx512f")]
59328    unsafe fn test_mm_fmadd_round_ss() {
59329        let a = _mm_set1_ps(1.);
59330        let b = _mm_set1_ps(2.);
59331        let c = _mm_set1_ps(3.);
59332        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59333        let e = _mm_set_ps(1., 1., 1., 5.);
59334        assert_eq_m128(r, e);
59335    }
59336
59337    #[simd_test(enable = "avx512f")]
59338    unsafe fn test_mm_mask_fmadd_round_ss() {
59339        let a = _mm_set1_ps(1.);
59340        let b = _mm_set1_ps(2.);
59341        let c = _mm_set1_ps(3.);
59342        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59343            a, 0, b, c,
59344        );
59345        assert_eq_m128(r, a);
59346        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59347            a, 0b11111111, b, c,
59348        );
59349        let e = _mm_set_ps(1., 1., 1., 5.);
59350        assert_eq_m128(r, e);
59351    }
59352
59353    #[simd_test(enable = "avx512f")]
59354    unsafe fn test_mm_maskz_fmadd_round_ss() {
59355        let a = _mm_set1_ps(1.);
59356        let b = _mm_set1_ps(2.);
59357        let c = _mm_set1_ps(3.);
59358        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59359            0, a, b, c,
59360        );
59361        let e = _mm_set_ps(1., 1., 1., 0.);
59362        assert_eq_m128(r, e);
59363        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59364            0b11111111, a, b, c,
59365        );
59366        let e = _mm_set_ps(1., 1., 1., 5.);
59367        assert_eq_m128(r, e);
59368    }
59369
59370    #[simd_test(enable = "avx512f")]
59371    unsafe fn test_mm_mask3_fmadd_round_ss() {
59372        let a = _mm_set1_ps(1.);
59373        let b = _mm_set1_ps(2.);
59374        let c = _mm_set1_ps(3.);
59375        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59376            a, b, c, 0,
59377        );
59378        assert_eq_m128(r, c);
59379        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59380            a, b, c, 0b11111111,
59381        );
59382        let e = _mm_set_ps(3., 3., 3., 5.);
59383        assert_eq_m128(r, e);
59384    }
59385
59386    #[simd_test(enable = "avx512f")]
59387    unsafe fn test_mm_fmadd_round_sd() {
59388        let a = _mm_set1_pd(1.);
59389        let b = _mm_set1_pd(2.);
59390        let c = _mm_set1_pd(3.);
59391        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59392        let e = _mm_set_pd(1., 5.);
59393        assert_eq_m128d(r, e);
59394    }
59395
59396    #[simd_test(enable = "avx512f")]
59397    unsafe fn test_mm_mask_fmadd_round_sd() {
59398        let a = _mm_set1_pd(1.);
59399        let b = _mm_set1_pd(2.);
59400        let c = _mm_set1_pd(3.);
59401        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59402            a, 0, b, c,
59403        );
59404        assert_eq_m128d(r, a);
59405        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59406            a, 0b11111111, b, c,
59407        );
59408        let e = _mm_set_pd(1., 5.);
59409        assert_eq_m128d(r, e);
59410    }
59411
59412    #[simd_test(enable = "avx512f")]
59413    unsafe fn test_mm_maskz_fmadd_round_sd() {
59414        let a = _mm_set1_pd(1.);
59415        let b = _mm_set1_pd(2.);
59416        let c = _mm_set1_pd(3.);
59417        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59418            0, a, b, c,
59419        );
59420        let e = _mm_set_pd(1., 0.);
59421        assert_eq_m128d(r, e);
59422        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59423            0b11111111, a, b, c,
59424        );
59425        let e = _mm_set_pd(1., 5.);
59426        assert_eq_m128d(r, e);
59427    }
59428
59429    #[simd_test(enable = "avx512f")]
59430    unsafe fn test_mm_mask3_fmadd_round_sd() {
59431        let a = _mm_set1_pd(1.);
59432        let b = _mm_set1_pd(2.);
59433        let c = _mm_set1_pd(3.);
59434        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59435            a, b, c, 0,
59436        );
59437        assert_eq_m128d(r, c);
59438        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59439            a, b, c, 0b11111111,
59440        );
59441        let e = _mm_set_pd(3., 5.);
59442        assert_eq_m128d(r, e);
59443    }
59444
59445    #[simd_test(enable = "avx512f")]
59446    unsafe fn test_mm_fmsub_round_ss() {
59447        let a = _mm_set1_ps(1.);
59448        let b = _mm_set1_ps(2.);
59449        let c = _mm_set1_ps(3.);
59450        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59451        let e = _mm_set_ps(1., 1., 1., -1.);
59452        assert_eq_m128(r, e);
59453    }
59454
59455    #[simd_test(enable = "avx512f")]
59456    unsafe fn test_mm_mask_fmsub_round_ss() {
59457        let a = _mm_set1_ps(1.);
59458        let b = _mm_set1_ps(2.);
59459        let c = _mm_set1_ps(3.);
59460        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59461            a, 0, b, c,
59462        );
59463        assert_eq_m128(r, a);
59464        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59465            a, 0b11111111, b, c,
59466        );
59467        let e = _mm_set_ps(1., 1., 1., -1.);
59468        assert_eq_m128(r, e);
59469    }
59470
59471    #[simd_test(enable = "avx512f")]
59472    unsafe fn test_mm_maskz_fmsub_round_ss() {
59473        let a = _mm_set1_ps(1.);
59474        let b = _mm_set1_ps(2.);
59475        let c = _mm_set1_ps(3.);
59476        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59477            0, a, b, c,
59478        );
59479        let e = _mm_set_ps(1., 1., 1., 0.);
59480        assert_eq_m128(r, e);
59481        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59482            0b11111111, a, b, c,
59483        );
59484        let e = _mm_set_ps(1., 1., 1., -1.);
59485        assert_eq_m128(r, e);
59486    }
59487
59488    #[simd_test(enable = "avx512f")]
59489    unsafe fn test_mm_mask3_fmsub_round_ss() {
59490        let a = _mm_set1_ps(1.);
59491        let b = _mm_set1_ps(2.);
59492        let c = _mm_set1_ps(3.);
59493        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59494            a, b, c, 0,
59495        );
59496        assert_eq_m128(r, c);
59497        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59498            a, b, c, 0b11111111,
59499        );
59500        let e = _mm_set_ps(3., 3., 3., -1.);
59501        assert_eq_m128(r, e);
59502    }
59503
59504    #[simd_test(enable = "avx512f")]
59505    unsafe fn test_mm_fmsub_round_sd() {
59506        let a = _mm_set1_pd(1.);
59507        let b = _mm_set1_pd(2.);
59508        let c = _mm_set1_pd(3.);
59509        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59510        let e = _mm_set_pd(1., -1.);
59511        assert_eq_m128d(r, e);
59512    }
59513
59514    #[simd_test(enable = "avx512f")]
59515    unsafe fn test_mm_mask_fmsub_round_sd() {
59516        let a = _mm_set1_pd(1.);
59517        let b = _mm_set1_pd(2.);
59518        let c = _mm_set1_pd(3.);
59519        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59520            a, 0, b, c,
59521        );
59522        assert_eq_m128d(r, a);
59523        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59524            a, 0b11111111, b, c,
59525        );
59526        let e = _mm_set_pd(1., -1.);
59527        assert_eq_m128d(r, e);
59528    }
59529
59530    #[simd_test(enable = "avx512f")]
59531    unsafe fn test_mm_maskz_fmsub_round_sd() {
59532        let a = _mm_set1_pd(1.);
59533        let b = _mm_set1_pd(2.);
59534        let c = _mm_set1_pd(3.);
59535        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59536            0, a, b, c,
59537        );
59538        let e = _mm_set_pd(1., 0.);
59539        assert_eq_m128d(r, e);
59540        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59541            0b11111111, a, b, c,
59542        );
59543        let e = _mm_set_pd(1., -1.);
59544        assert_eq_m128d(r, e);
59545    }
59546
59547    #[simd_test(enable = "avx512f")]
59548    unsafe fn test_mm_mask3_fmsub_round_sd() {
59549        let a = _mm_set1_pd(1.);
59550        let b = _mm_set1_pd(2.);
59551        let c = _mm_set1_pd(3.);
59552        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59553            a, b, c, 0,
59554        );
59555        assert_eq_m128d(r, c);
59556        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59557            a, b, c, 0b11111111,
59558        );
59559        let e = _mm_set_pd(3., -1.);
59560        assert_eq_m128d(r, e);
59561    }
59562
59563    #[simd_test(enable = "avx512f")]
59564    unsafe fn test_mm_fnmadd_round_ss() {
59565        let a = _mm_set1_ps(1.);
59566        let b = _mm_set1_ps(2.);
59567        let c = _mm_set1_ps(3.);
59568        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59569        let e = _mm_set_ps(1., 1., 1., 1.);
59570        assert_eq_m128(r, e);
59571    }
59572
59573    #[simd_test(enable = "avx512f")]
59574    unsafe fn test_mm_mask_fnmadd_round_ss() {
59575        let a = _mm_set1_ps(1.);
59576        let b = _mm_set1_ps(2.);
59577        let c = _mm_set1_ps(3.);
59578        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59579            a, 0, b, c,
59580        );
59581        assert_eq_m128(r, a);
59582        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59583            a, 0b11111111, b, c,
59584        );
59585        let e = _mm_set_ps(1., 1., 1., 1.);
59586        assert_eq_m128(r, e);
59587    }
59588
59589    #[simd_test(enable = "avx512f")]
59590    unsafe fn test_mm_maskz_fnmadd_round_ss() {
59591        let a = _mm_set1_ps(1.);
59592        let b = _mm_set1_ps(2.);
59593        let c = _mm_set1_ps(3.);
59594        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59595            0, a, b, c,
59596        );
59597        let e = _mm_set_ps(1., 1., 1., 0.);
59598        assert_eq_m128(r, e);
59599        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59600            0b11111111, a, b, c,
59601        );
59602        let e = _mm_set_ps(1., 1., 1., 1.);
59603        assert_eq_m128(r, e);
59604    }
59605
59606    #[simd_test(enable = "avx512f")]
59607    unsafe fn test_mm_mask3_fnmadd_round_ss() {
59608        let a = _mm_set1_ps(1.);
59609        let b = _mm_set1_ps(2.);
59610        let c = _mm_set1_ps(3.);
59611        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59612            a, b, c, 0,
59613        );
59614        assert_eq_m128(r, c);
59615        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59616            a, b, c, 0b11111111,
59617        );
59618        let e = _mm_set_ps(3., 3., 3., 1.);
59619        assert_eq_m128(r, e);
59620    }
59621
59622    #[simd_test(enable = "avx512f")]
59623    unsafe fn test_mm_fnmadd_round_sd() {
59624        let a = _mm_set1_pd(1.);
59625        let b = _mm_set1_pd(2.);
59626        let c = _mm_set1_pd(3.);
59627        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59628        let e = _mm_set_pd(1., 1.);
59629        assert_eq_m128d(r, e);
59630    }
59631
59632    #[simd_test(enable = "avx512f")]
59633    unsafe fn test_mm_mask_fnmadd_round_sd() {
59634        let a = _mm_set1_pd(1.);
59635        let b = _mm_set1_pd(2.);
59636        let c = _mm_set1_pd(3.);
59637        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59638            a, 0, b, c,
59639        );
59640        assert_eq_m128d(r, a);
59641        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59642            a, 0b11111111, b, c,
59643        );
59644        let e = _mm_set_pd(1., 1.);
59645        assert_eq_m128d(r, e);
59646    }
59647
59648    #[simd_test(enable = "avx512f")]
59649    unsafe fn test_mm_maskz_fnmadd_round_sd() {
59650        let a = _mm_set1_pd(1.);
59651        let b = _mm_set1_pd(2.);
59652        let c = _mm_set1_pd(3.);
59653        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59654            0, a, b, c,
59655        );
59656        let e = _mm_set_pd(1., 0.);
59657        assert_eq_m128d(r, e);
59658        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59659            0b11111111, a, b, c,
59660        );
59661        let e = _mm_set_pd(1., 1.);
59662        assert_eq_m128d(r, e);
59663    }
59664
59665    #[simd_test(enable = "avx512f")]
59666    unsafe fn test_mm_mask3_fnmadd_round_sd() {
59667        let a = _mm_set1_pd(1.);
59668        let b = _mm_set1_pd(2.);
59669        let c = _mm_set1_pd(3.);
59670        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59671            a, b, c, 0,
59672        );
59673        assert_eq_m128d(r, c);
59674        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59675            a, b, c, 0b11111111,
59676        );
59677        let e = _mm_set_pd(3., 1.);
59678        assert_eq_m128d(r, e);
59679    }
59680
59681    #[simd_test(enable = "avx512f")]
59682    unsafe fn test_mm_fnmsub_round_ss() {
59683        let a = _mm_set1_ps(1.);
59684        let b = _mm_set1_ps(2.);
59685        let c = _mm_set1_ps(3.);
59686        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59687        let e = _mm_set_ps(1., 1., 1., -5.);
59688        assert_eq_m128(r, e);
59689    }
59690
59691    #[simd_test(enable = "avx512f")]
59692    unsafe fn test_mm_mask_fnmsub_round_ss() {
59693        let a = _mm_set1_ps(1.);
59694        let b = _mm_set1_ps(2.);
59695        let c = _mm_set1_ps(3.);
59696        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59697            a, 0, b, c,
59698        );
59699        assert_eq_m128(r, a);
59700        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59701            a, 0b11111111, b, c,
59702        );
59703        let e = _mm_set_ps(1., 1., 1., -5.);
59704        assert_eq_m128(r, e);
59705    }
59706
59707    #[simd_test(enable = "avx512f")]
59708    unsafe fn test_mm_maskz_fnmsub_round_ss() {
59709        let a = _mm_set1_ps(1.);
59710        let b = _mm_set1_ps(2.);
59711        let c = _mm_set1_ps(3.);
59712        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59713            0, a, b, c,
59714        );
59715        let e = _mm_set_ps(1., 1., 1., 0.);
59716        assert_eq_m128(r, e);
59717        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59718            0b11111111, a, b, c,
59719        );
59720        let e = _mm_set_ps(1., 1., 1., -5.);
59721        assert_eq_m128(r, e);
59722    }
59723
59724    #[simd_test(enable = "avx512f")]
59725    unsafe fn test_mm_mask3_fnmsub_round_ss() {
59726        let a = _mm_set1_ps(1.);
59727        let b = _mm_set1_ps(2.);
59728        let c = _mm_set1_ps(3.);
59729        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59730            a, b, c, 0,
59731        );
59732        assert_eq_m128(r, c);
59733        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59734            a, b, c, 0b11111111,
59735        );
59736        let e = _mm_set_ps(3., 3., 3., -5.);
59737        assert_eq_m128(r, e);
59738    }
59739
59740    #[simd_test(enable = "avx512f")]
59741    unsafe fn test_mm_fnmsub_round_sd() {
59742        let a = _mm_set1_pd(1.);
59743        let b = _mm_set1_pd(2.);
59744        let c = _mm_set1_pd(3.);
59745        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59746        let e = _mm_set_pd(1., -5.);
59747        assert_eq_m128d(r, e);
59748    }
59749
59750    #[simd_test(enable = "avx512f")]
59751    unsafe fn test_mm_mask_fnmsub_round_sd() {
59752        let a = _mm_set1_pd(1.);
59753        let b = _mm_set1_pd(2.);
59754        let c = _mm_set1_pd(3.);
59755        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59756            a, 0, b, c,
59757        );
59758        assert_eq_m128d(r, a);
59759        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59760            a, 0b11111111, b, c,
59761        );
59762        let e = _mm_set_pd(1., -5.);
59763        assert_eq_m128d(r, e);
59764    }
59765
59766    #[simd_test(enable = "avx512f")]
59767    unsafe fn test_mm_maskz_fnmsub_round_sd() {
59768        let a = _mm_set1_pd(1.);
59769        let b = _mm_set1_pd(2.);
59770        let c = _mm_set1_pd(3.);
59771        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59772            0, a, b, c,
59773        );
59774        let e = _mm_set_pd(1., 0.);
59775        assert_eq_m128d(r, e);
59776        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59777            0b11111111, a, b, c,
59778        );
59779        let e = _mm_set_pd(1., -5.);
59780        assert_eq_m128d(r, e);
59781    }
59782
59783    #[simd_test(enable = "avx512f")]
59784    unsafe fn test_mm_mask3_fnmsub_round_sd() {
59785        let a = _mm_set1_pd(1.);
59786        let b = _mm_set1_pd(2.);
59787        let c = _mm_set1_pd(3.);
59788        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59789            a, b, c, 0,
59790        );
59791        assert_eq_m128d(r, c);
59792        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59793            a, b, c, 0b11111111,
59794        );
59795        let e = _mm_set_pd(3., -5.);
59796        assert_eq_m128d(r, e);
59797    }
59798
59799    #[simd_test(enable = "avx512f")]
59800    unsafe fn test_mm_fixupimm_ss() {
59801        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59802        let b = _mm_set1_ps(f32::MAX);
59803        let c = _mm_set1_epi32(i32::MAX);
59804        let r = _mm_fixupimm_ss::<5>(a, b, c);
59805        let e = _mm_set_ps(0., 0., 0., -0.0);
59806        assert_eq_m128(r, e);
59807    }
59808
59809    #[simd_test(enable = "avx512f")]
59810    unsafe fn test_mm_mask_fixupimm_ss() {
59811        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59812        let b = _mm_set1_ps(f32::MAX);
59813        let c = _mm_set1_epi32(i32::MAX);
59814        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
59815        let e = _mm_set_ps(0., 0., 0., -0.0);
59816        assert_eq_m128(r, e);
59817    }
59818
59819    #[simd_test(enable = "avx512f")]
59820    unsafe fn test_mm_maskz_fixupimm_ss() {
59821        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59822        let b = _mm_set1_ps(f32::MAX);
59823        let c = _mm_set1_epi32(i32::MAX);
59824        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
59825        let e = _mm_set_ps(0., 0., 0., 0.0);
59826        assert_eq_m128(r, e);
59827        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
59828        let e = _mm_set_ps(0., 0., 0., -0.0);
59829        assert_eq_m128(r, e);
59830    }
59831
59832    #[simd_test(enable = "avx512f")]
59833    unsafe fn test_mm_fixupimm_sd() {
59834        let a = _mm_set_pd(0., f64::NAN);
59835        let b = _mm_set1_pd(f64::MAX);
59836        let c = _mm_set1_epi64x(i32::MAX as i64);
59837        let r = _mm_fixupimm_sd::<5>(a, b, c);
59838        let e = _mm_set_pd(0., -0.0);
59839        assert_eq_m128d(r, e);
59840    }
59841
59842    #[simd_test(enable = "avx512f")]
59843    unsafe fn test_mm_mask_fixupimm_sd() {
59844        let a = _mm_set_pd(0., f64::NAN);
59845        let b = _mm_set1_pd(f64::MAX);
59846        let c = _mm_set1_epi64x(i32::MAX as i64);
59847        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
59848        let e = _mm_set_pd(0., -0.0);
59849        assert_eq_m128d(r, e);
59850    }
59851
59852    #[simd_test(enable = "avx512f")]
59853    unsafe fn test_mm_maskz_fixupimm_sd() {
59854        let a = _mm_set_pd(0., f64::NAN);
59855        let b = _mm_set1_pd(f64::MAX);
59856        let c = _mm_set1_epi64x(i32::MAX as i64);
59857        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
59858        let e = _mm_set_pd(0., 0.0);
59859        assert_eq_m128d(r, e);
59860        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
59861        let e = _mm_set_pd(0., -0.0);
59862        assert_eq_m128d(r, e);
59863    }
59864
59865    #[simd_test(enable = "avx512f")]
59866    unsafe fn test_mm_fixupimm_round_ss() {
59867        let a = _mm_set_ps(1., 0., 0., f32::NAN);
59868        let b = _mm_set1_ps(f32::MAX);
59869        let c = _mm_set1_epi32(i32::MAX);
59870        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59871        let e = _mm_set_ps(1., 0., 0., -0.0);
59872        assert_eq_m128(r, e);
59873    }
59874
59875    #[simd_test(enable = "avx512f")]
59876    unsafe fn test_mm_mask_fixupimm_round_ss() {
59877        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59878        let b = _mm_set1_ps(f32::MAX);
59879        let c = _mm_set1_epi32(i32::MAX);
59880        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59881        let e = _mm_set_ps(0., 0., 0., -0.0);
59882        assert_eq_m128(r, e);
59883    }
59884
59885    #[simd_test(enable = "avx512f")]
59886    unsafe fn test_mm_maskz_fixupimm_round_ss() {
59887        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59888        let b = _mm_set1_ps(f32::MAX);
59889        let c = _mm_set1_epi32(i32::MAX);
59890        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59891        let e = _mm_set_ps(0., 0., 0., 0.0);
59892        assert_eq_m128(r, e);
59893        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59894        let e = _mm_set_ps(0., 0., 0., -0.0);
59895        assert_eq_m128(r, e);
59896    }
59897
59898    #[simd_test(enable = "avx512f")]
59899    unsafe fn test_mm_fixupimm_round_sd() {
59900        let a = _mm_set_pd(0., f64::NAN);
59901        let b = _mm_set1_pd(f64::MAX);
59902        let c = _mm_set1_epi64x(i32::MAX as i64);
59903        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59904        let e = _mm_set_pd(0., -0.0);
59905        assert_eq_m128d(r, e);
59906    }
59907
59908    #[simd_test(enable = "avx512f")]
59909    unsafe fn test_mm_mask_fixupimm_round_sd() {
59910        let a = _mm_set_pd(0., f64::NAN);
59911        let b = _mm_set1_pd(f64::MAX);
59912        let c = _mm_set1_epi64x(i32::MAX as i64);
59913        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59914        let e = _mm_set_pd(0., -0.0);
59915        assert_eq_m128d(r, e);
59916    }
59917
59918    #[simd_test(enable = "avx512f")]
59919    unsafe fn test_mm_maskz_fixupimm_round_sd() {
59920        let a = _mm_set_pd(0., f64::NAN);
59921        let b = _mm_set1_pd(f64::MAX);
59922        let c = _mm_set1_epi64x(i32::MAX as i64);
59923        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59924        let e = _mm_set_pd(0., 0.0);
59925        assert_eq_m128d(r, e);
59926        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59927        let e = _mm_set_pd(0., -0.0);
59928        assert_eq_m128d(r, e);
59929    }
59930
59931    #[simd_test(enable = "avx512f")]
59932    unsafe fn test_mm_mask_cvtss_sd() {
59933        let a = _mm_set_pd(6., -7.5);
59934        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59935        let r = _mm_mask_cvtss_sd(a, 0, a, b);
59936        assert_eq_m128d(r, a);
59937        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
59938        let e = _mm_set_pd(6., -1.5);
59939        assert_eq_m128d(r, e);
59940    }
59941
59942    #[simd_test(enable = "avx512f")]
59943    unsafe fn test_mm_maskz_cvtss_sd() {
59944        let a = _mm_set_pd(6., -7.5);
59945        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59946        let r = _mm_maskz_cvtss_sd(0, a, b);
59947        let e = _mm_set_pd(6., 0.);
59948        assert_eq_m128d(r, e);
59949        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
59950        let e = _mm_set_pd(6., -1.5);
59951        assert_eq_m128d(r, e);
59952    }
59953
59954    #[simd_test(enable = "avx512f")]
59955    unsafe fn test_mm_mask_cvtsd_ss() {
59956        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59957        let b = _mm_set_pd(6., -7.5);
59958        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
59959        assert_eq_m128(r, a);
59960        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
59961        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59962        assert_eq_m128(r, e);
59963    }
59964
59965    #[simd_test(enable = "avx512f")]
59966    unsafe fn test_mm_maskz_cvtsd_ss() {
59967        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59968        let b = _mm_set_pd(6., -7.5);
59969        let r = _mm_maskz_cvtsd_ss(0, a, b);
59970        let e = _mm_set_ps(0., -0.5, 1., 0.);
59971        assert_eq_m128(r, e);
59972        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
59973        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59974        assert_eq_m128(r, e);
59975    }
59976
59977    #[simd_test(enable = "avx512f")]
59978    unsafe fn test_mm_cvt_roundss_sd() {
59979        let a = _mm_set_pd(6., -7.5);
59980        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59981        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59982        let e = _mm_set_pd(6., -1.5);
59983        assert_eq_m128d(r, e);
59984    }
59985
59986    #[simd_test(enable = "avx512f")]
59987    unsafe fn test_mm_mask_cvt_roundss_sd() {
59988        let a = _mm_set_pd(6., -7.5);
59989        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59990        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59991        assert_eq_m128d(r, a);
59992        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59993        let e = _mm_set_pd(6., -1.5);
59994        assert_eq_m128d(r, e);
59995    }
59996
59997    #[simd_test(enable = "avx512f")]
59998    unsafe fn test_mm_maskz_cvt_roundss_sd() {
59999        let a = _mm_set_pd(6., -7.5);
60000        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60001        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60002        let e = _mm_set_pd(6., 0.);
60003        assert_eq_m128d(r, e);
60004        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60005        let e = _mm_set_pd(6., -1.5);
60006        assert_eq_m128d(r, e);
60007    }
60008
60009    #[simd_test(enable = "avx512f")]
60010    unsafe fn test_mm_cvt_roundsd_ss() {
60011        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60012        let b = _mm_set_pd(6., -7.5);
60013        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60014        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60015        assert_eq_m128(r, e);
60016    }
60017
60018    #[simd_test(enable = "avx512f")]
60019    unsafe fn test_mm_mask_cvt_roundsd_ss() {
60020        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60021        let b = _mm_set_pd(6., -7.5);
60022        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
60023        assert_eq_m128(r, a);
60024        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60025            a, 0b11111111, a, b,
60026        );
60027        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60028        assert_eq_m128(r, e);
60029    }
60030
60031    #[simd_test(enable = "avx512f")]
60032    unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60033        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60034        let b = _mm_set_pd(6., -7.5);
60035        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60036        let e = _mm_set_ps(0., -0.5, 1., 0.);
60037        assert_eq_m128(r, e);
60038        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60039            0b11111111, a, b,
60040        );
60041        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60042        assert_eq_m128(r, e);
60043    }
60044
60045    #[simd_test(enable = "avx512f")]
60046    unsafe fn test_mm_cvt_roundss_si32() {
60047        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60048        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60049        let e: i32 = -1;
60050        assert_eq!(r, e);
60051    }
60052
60053    #[simd_test(enable = "avx512f")]
60054    unsafe fn test_mm_cvt_roundss_i32() {
60055        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60056        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60057        let e: i32 = -1;
60058        assert_eq!(r, e);
60059    }
60060
60061    #[simd_test(enable = "avx512f")]
60062    unsafe fn test_mm_cvt_roundss_u32() {
60063        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60064        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60065        let e: u32 = u32::MAX;
60066        assert_eq!(r, e);
60067    }
60068
60069    #[simd_test(enable = "avx512f")]
60070    unsafe fn test_mm_cvtss_i32() {
60071        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60072        let r = _mm_cvtss_i32(a);
60073        let e: i32 = -2;
60074        assert_eq!(r, e);
60075    }
60076
60077    #[simd_test(enable = "avx512f")]
60078    unsafe fn test_mm_cvtss_u32() {
60079        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60080        let r = _mm_cvtss_u32(a);
60081        let e: u32 = u32::MAX;
60082        assert_eq!(r, e);
60083    }
60084
60085    #[simd_test(enable = "avx512f")]
60086    unsafe fn test_mm_cvt_roundsd_si32() {
60087        let a = _mm_set_pd(1., -1.5);
60088        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60089        let e: i32 = -1;
60090        assert_eq!(r, e);
60091    }
60092
60093    #[simd_test(enable = "avx512f")]
60094    unsafe fn test_mm_cvt_roundsd_i32() {
60095        let a = _mm_set_pd(1., -1.5);
60096        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60097        let e: i32 = -1;
60098        assert_eq!(r, e);
60099    }
60100
60101    #[simd_test(enable = "avx512f")]
60102    unsafe fn test_mm_cvt_roundsd_u32() {
60103        let a = _mm_set_pd(1., -1.5);
60104        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60105        let e: u32 = u32::MAX;
60106        assert_eq!(r, e);
60107    }
60108
60109    #[simd_test(enable = "avx512f")]
60110    unsafe fn test_mm_cvtsd_i32() {
60111        let a = _mm_set_pd(1., -1.5);
60112        let r = _mm_cvtsd_i32(a);
60113        let e: i32 = -2;
60114        assert_eq!(r, e);
60115    }
60116
60117    #[simd_test(enable = "avx512f")]
60118    unsafe fn test_mm_cvtsd_u32() {
60119        let a = _mm_set_pd(1., -1.5);
60120        let r = _mm_cvtsd_u32(a);
60121        let e: u32 = u32::MAX;
60122        assert_eq!(r, e);
60123    }
60124
60125    #[simd_test(enable = "avx512f")]
60126    unsafe fn test_mm_cvt_roundi32_ss() {
60127        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60128        let b: i32 = 9;
60129        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60130        let e = _mm_set_ps(0., -0.5, 1., 9.);
60131        assert_eq_m128(r, e);
60132    }
60133
60134    #[simd_test(enable = "avx512f")]
60135    unsafe fn test_mm_cvt_roundsi32_ss() {
60136        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60137        let b: i32 = 9;
60138        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60139        let e = _mm_set_ps(0., -0.5, 1., 9.);
60140        assert_eq_m128(r, e);
60141    }
60142
60143    #[simd_test(enable = "avx512f")]
60144    unsafe fn test_mm_cvt_roundu32_ss() {
60145        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60146        let b: u32 = 9;
60147        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60148        let e = _mm_set_ps(0., -0.5, 1., 9.);
60149        assert_eq_m128(r, e);
60150    }
60151
60152    #[simd_test(enable = "avx512f")]
60153    unsafe fn test_mm_cvti32_ss() {
60154        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60155        let b: i32 = 9;
60156        let r = _mm_cvti32_ss(a, b);
60157        let e = _mm_set_ps(0., -0.5, 1., 9.);
60158        assert_eq_m128(r, e);
60159    }
60160
60161    #[simd_test(enable = "avx512f")]
60162    unsafe fn test_mm_cvti32_sd() {
60163        let a = _mm_set_pd(1., -1.5);
60164        let b: i32 = 9;
60165        let r = _mm_cvti32_sd(a, b);
60166        let e = _mm_set_pd(1., 9.);
60167        assert_eq_m128d(r, e);
60168    }
60169
60170    #[simd_test(enable = "avx512f")]
60171    unsafe fn test_mm_cvtt_roundss_si32() {
60172        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60173        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60174        let e: i32 = -1;
60175        assert_eq!(r, e);
60176    }
60177
60178    #[simd_test(enable = "avx512f")]
60179    unsafe fn test_mm_cvtt_roundss_i32() {
60180        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60181        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60182        let e: i32 = -1;
60183        assert_eq!(r, e);
60184    }
60185
60186    #[simd_test(enable = "avx512f")]
60187    unsafe fn test_mm_cvtt_roundss_u32() {
60188        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60189        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60190        let e: u32 = u32::MAX;
60191        assert_eq!(r, e);
60192    }
60193
60194    #[simd_test(enable = "avx512f")]
60195    unsafe fn test_mm_cvttss_i32() {
60196        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60197        let r = _mm_cvttss_i32(a);
60198        let e: i32 = -1;
60199        assert_eq!(r, e);
60200    }
60201
60202    #[simd_test(enable = "avx512f")]
60203    unsafe fn test_mm_cvttss_u32() {
60204        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60205        let r = _mm_cvttss_u32(a);
60206        let e: u32 = u32::MAX;
60207        assert_eq!(r, e);
60208    }
60209
60210    #[simd_test(enable = "avx512f")]
60211    unsafe fn test_mm_cvtt_roundsd_si32() {
60212        let a = _mm_set_pd(1., -1.5);
60213        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60214        let e: i32 = -1;
60215        assert_eq!(r, e);
60216    }
60217
60218    #[simd_test(enable = "avx512f")]
60219    unsafe fn test_mm_cvtt_roundsd_i32() {
60220        let a = _mm_set_pd(1., -1.5);
60221        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60222        let e: i32 = -1;
60223        assert_eq!(r, e);
60224    }
60225
60226    #[simd_test(enable = "avx512f")]
60227    unsafe fn test_mm_cvtt_roundsd_u32() {
60228        let a = _mm_set_pd(1., -1.5);
60229        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60230        let e: u32 = u32::MAX;
60231        assert_eq!(r, e);
60232    }
60233
60234    #[simd_test(enable = "avx512f")]
60235    unsafe fn test_mm_cvttsd_i32() {
60236        let a = _mm_set_pd(1., -1.5);
60237        let r = _mm_cvttsd_i32(a);
60238        let e: i32 = -1;
60239        assert_eq!(r, e);
60240    }
60241
60242    #[simd_test(enable = "avx512f")]
60243    unsafe fn test_mm_cvttsd_u32() {
60244        let a = _mm_set_pd(1., -1.5);
60245        let r = _mm_cvttsd_u32(a);
60246        let e: u32 = u32::MAX;
60247        assert_eq!(r, e);
60248    }
60249
60250    #[simd_test(enable = "avx512f")]
60251    unsafe fn test_mm_cvtu32_ss() {
60252        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60253        let b: u32 = 9;
60254        let r = _mm_cvtu32_ss(a, b);
60255        let e = _mm_set_ps(0., -0.5, 1., 9.);
60256        assert_eq_m128(r, e);
60257    }
60258
60259    #[simd_test(enable = "avx512f")]
60260    unsafe fn test_mm_cvtu32_sd() {
60261        let a = _mm_set_pd(1., -1.5);
60262        let b: u32 = 9;
60263        let r = _mm_cvtu32_sd(a, b);
60264        let e = _mm_set_pd(1., 9.);
60265        assert_eq_m128d(r, e);
60266    }
60267
60268    #[simd_test(enable = "avx512f")]
60269    unsafe fn test_mm_comi_round_ss() {
60270        let a = _mm_set1_ps(2.2);
60271        let b = _mm_set1_ps(1.1);
60272        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60273        let e: i32 = 0;
60274        assert_eq!(r, e);
60275    }
60276
60277    #[simd_test(enable = "avx512f")]
60278    unsafe fn test_mm_comi_round_sd() {
60279        let a = _mm_set1_pd(2.2);
60280        let b = _mm_set1_pd(1.1);
60281        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60282        let e: i32 = 0;
60283        assert_eq!(r, e);
60284    }
60285
60286    #[simd_test(enable = "avx512f")]
60287    unsafe fn test_mm512_cvtsi512_si32() {
60288        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
60289        let r = _mm512_cvtsi512_si32(a);
60290        let e: i32 = 1;
60291        assert_eq!(r, e);
60292    }
60293
60294    #[simd_test(enable = "avx512f")]
60295    unsafe fn test_mm512_cvtss_f32() {
60296        let a = _mm512_setr_ps(
60297            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
60298        );
60299        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
60300    }
60301
60302    #[simd_test(enable = "avx512f")]
60303    unsafe fn test_mm512_cvtsd_f64() {
60304        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
60305        assert_eq!(r, -1.1);
60306    }
60307
60308    #[simd_test(enable = "avx512f")]
60309    unsafe fn test_mm512_shuffle_pd() {
60310        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60311        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60312        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
60313        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60314        assert_eq_m512d(r, e);
60315    }
60316
60317    #[simd_test(enable = "avx512f")]
60318    unsafe fn test_mm512_mask_shuffle_pd() {
60319        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60320        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60321        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
60322        assert_eq_m512d(r, a);
60323        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
60324        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60325        assert_eq_m512d(r, e);
60326    }
60327
60328    #[simd_test(enable = "avx512f")]
60329    unsafe fn test_mm512_maskz_shuffle_pd() {
60330        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60331        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60332        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
60333        assert_eq_m512d(r, _mm512_setzero_pd());
60334        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
60335        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
60336        assert_eq_m512d(r, e);
60337    }
60338
60339    #[simd_test(enable = "avx512f")]
60340    unsafe fn test_mm512_mask_expandloadu_epi32() {
60341        let src = _mm512_set1_epi32(42);
60342        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60343        let p = a.as_ptr();
60344        let m = 0b11101000_11001010;
60345        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60346        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
60347        assert_eq_m512i(r, e);
60348    }
60349
60350    #[simd_test(enable = "avx512f")]
60351    unsafe fn test_mm512_maskz_expandloadu_epi32() {
60352        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60353        let p = a.as_ptr();
60354        let m = 0b11101000_11001010;
60355        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60356        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
60357        assert_eq_m512i(r, e);
60358    }
60359
60360    #[simd_test(enable = "avx512f,avx512vl")]
60361    unsafe fn test_mm256_mask_expandloadu_epi32() {
60362        let src = _mm256_set1_epi32(42);
60363        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60364        let p = a.as_ptr();
60365        let m = 0b11101000;
60366        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60367        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
60368        assert_eq_m256i(r, e);
60369    }
60370
60371    #[simd_test(enable = "avx512f,avx512vl")]
60372    unsafe fn test_mm256_maskz_expandloadu_epi32() {
60373        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60374        let p = a.as_ptr();
60375        let m = 0b11101000;
60376        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60377        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
60378        assert_eq_m256i(r, e);
60379    }
60380
60381    #[simd_test(enable = "avx512f,avx512vl")]
60382    unsafe fn test_mm_mask_expandloadu_epi32() {
60383        let src = _mm_set1_epi32(42);
60384        let a = &[1_i32, 2, 3, 4];
60385        let p = a.as_ptr();
60386        let m = 0b11111000;
60387        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60388        let e = _mm_set_epi32(1, 42, 42, 42);
60389        assert_eq_m128i(r, e);
60390    }
60391
60392    #[simd_test(enable = "avx512f,avx512vl")]
60393    unsafe fn test_mm_maskz_expandloadu_epi32() {
60394        let a = &[1_i32, 2, 3, 4];
60395        let p = a.as_ptr();
60396        let m = 0b11111000;
60397        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60398        let e = _mm_set_epi32(1, 0, 0, 0);
60399        assert_eq_m128i(r, e);
60400    }
60401
60402    #[simd_test(enable = "avx512f")]
60403    unsafe fn test_mm512_mask_expandloadu_epi64() {
60404        let src = _mm512_set1_epi64(42);
60405        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60406        let p = a.as_ptr();
60407        let m = 0b11101000;
60408        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60409        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
60410        assert_eq_m512i(r, e);
60411    }
60412
60413    #[simd_test(enable = "avx512f")]
60414    unsafe fn test_mm512_maskz_expandloadu_epi64() {
60415        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60416        let p = a.as_ptr();
60417        let m = 0b11101000;
60418        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60419        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
60420        assert_eq_m512i(r, e);
60421    }
60422
60423    #[simd_test(enable = "avx512f,avx512vl")]
60424    unsafe fn test_mm256_mask_expandloadu_epi64() {
60425        let src = _mm256_set1_epi64x(42);
60426        let a = &[1_i64, 2, 3, 4];
60427        let p = a.as_ptr();
60428        let m = 0b11101000;
60429        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60430        let e = _mm256_set_epi64x(1, 42, 42, 42);
60431        assert_eq_m256i(r, e);
60432    }
60433
60434    #[simd_test(enable = "avx512f,avx512vl")]
60435    unsafe fn test_mm256_maskz_expandloadu_epi64() {
60436        let a = &[1_i64, 2, 3, 4];
60437        let p = a.as_ptr();
60438        let m = 0b11101000;
60439        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60440        let e = _mm256_set_epi64x(1, 0, 0, 0);
60441        assert_eq_m256i(r, e);
60442    }
60443
60444    #[simd_test(enable = "avx512f,avx512vl")]
60445    unsafe fn test_mm_mask_expandloadu_epi64() {
60446        let src = _mm_set1_epi64x(42);
60447        let a = &[1_i64, 2];
60448        let p = a.as_ptr();
60449        let m = 0b11101000;
60450        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60451        let e = _mm_set_epi64x(42, 42);
60452        assert_eq_m128i(r, e);
60453    }
60454
60455    #[simd_test(enable = "avx512f,avx512vl")]
60456    unsafe fn test_mm_maskz_expandloadu_epi64() {
60457        let a = &[1_i64, 2];
60458        let p = a.as_ptr();
60459        let m = 0b11101000;
60460        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60461        let e = _mm_set_epi64x(0, 0);
60462        assert_eq_m128i(r, e);
60463    }
60464
60465    #[simd_test(enable = "avx512f")]
60466    unsafe fn test_mm512_mask_expandloadu_ps() {
60467        let src = _mm512_set1_ps(42.);
60468        let a = &[
60469            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60470        ];
60471        let p = a.as_ptr();
60472        let m = 0b11101000_11001010;
60473        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60474        let e = _mm512_set_ps(
60475            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
60476        );
60477        assert_eq_m512(r, e);
60478    }
60479
60480    #[simd_test(enable = "avx512f")]
60481    unsafe fn test_mm512_maskz_expandloadu_ps() {
60482        let a = &[
60483            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60484        ];
60485        let p = a.as_ptr();
60486        let m = 0b11101000_11001010;
60487        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60488        let e = _mm512_set_ps(
60489            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
60490        );
60491        assert_eq_m512(r, e);
60492    }
60493
60494    #[simd_test(enable = "avx512f,avx512vl")]
60495    unsafe fn test_mm256_mask_expandloadu_ps() {
60496        let src = _mm256_set1_ps(42.);
60497        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60498        let p = a.as_ptr();
60499        let m = 0b11101000;
60500        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60501        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
60502        assert_eq_m256(r, e);
60503    }
60504
60505    #[simd_test(enable = "avx512f,avx512vl")]
60506    unsafe fn test_mm256_maskz_expandloadu_ps() {
60507        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60508        let p = a.as_ptr();
60509        let m = 0b11101000;
60510        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60511        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
60512        assert_eq_m256(r, e);
60513    }
60514
60515    #[simd_test(enable = "avx512f,avx512vl")]
60516    unsafe fn test_mm_mask_expandloadu_ps() {
60517        let src = _mm_set1_ps(42.);
60518        let a = &[1.0f32, 2., 3., 4.];
60519        let p = a.as_ptr();
60520        let m = 0b11101000;
60521        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60522        let e = _mm_set_ps(1., 42., 42., 42.);
60523        assert_eq_m128(r, e);
60524    }
60525
60526    #[simd_test(enable = "avx512f,avx512vl")]
60527    unsafe fn test_mm_maskz_expandloadu_ps() {
60528        let a = &[1.0f32, 2., 3., 4.];
60529        let p = a.as_ptr();
60530        let m = 0b11101000;
60531        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60532        let e = _mm_set_ps(1., 0., 0., 0.);
60533        assert_eq_m128(r, e);
60534    }
60535
60536    #[simd_test(enable = "avx512f")]
60537    unsafe fn test_mm512_mask_expandloadu_pd() {
60538        let src = _mm512_set1_pd(42.);
60539        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60540        let p = a.as_ptr();
60541        let m = 0b11101000;
60542        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60543        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
60544        assert_eq_m512d(r, e);
60545    }
60546
60547    #[simd_test(enable = "avx512f")]
60548    unsafe fn test_mm512_maskz_expandloadu_pd() {
60549        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60550        let p = a.as_ptr();
60551        let m = 0b11101000;
60552        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60553        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
60554        assert_eq_m512d(r, e);
60555    }
60556
60557    #[simd_test(enable = "avx512f,avx512vl")]
60558    unsafe fn test_mm256_mask_expandloadu_pd() {
60559        let src = _mm256_set1_pd(42.);
60560        let a = &[1.0f64, 2., 3., 4.];
60561        let p = a.as_ptr();
60562        let m = 0b11101000;
60563        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60564        let e = _mm256_set_pd(1., 42., 42., 42.);
60565        assert_eq_m256d(r, e);
60566    }
60567
60568    #[simd_test(enable = "avx512f,avx512vl")]
60569    unsafe fn test_mm256_maskz_expandloadu_pd() {
60570        let a = &[1.0f64, 2., 3., 4.];
60571        let p = a.as_ptr();
60572        let m = 0b11101000;
60573        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60574        let e = _mm256_set_pd(1., 0., 0., 0.);
60575        assert_eq_m256d(r, e);
60576    }
60577
60578    #[simd_test(enable = "avx512f,avx512vl")]
60579    unsafe fn test_mm_mask_expandloadu_pd() {
60580        let src = _mm_set1_pd(42.);
60581        let a = &[1.0f64, 2.];
60582        let p = a.as_ptr();
60583        let m = 0b11101000;
60584        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60585        let e = _mm_set_pd(42., 42.);
60586        assert_eq_m128d(r, e);
60587    }
60588
60589    #[simd_test(enable = "avx512f,avx512vl")]
60590    unsafe fn test_mm_maskz_expandloadu_pd() {
60591        let a = &[1.0f64, 2.];
60592        let p = a.as_ptr();
60593        let m = 0b11101000;
60594        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60595        let e = _mm_set_pd(0., 0.);
60596        assert_eq_m128d(r, e);
60597    }
60598}