Skip to content

Commit 00a52de

Browse files
Copilotnihui
andcommitted
feat: extend x86 isa detection
Co-authored-by: nihui <[email protected]>
1 parent 3dca2af commit 00a52de

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ int main()
6262

6363
|CPU|ISA|
6464
|:---:|---|
65-
|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `avxneconvert` `amxfp16` `amxbf16` `amxint8` `amxtile` `bmi1` `bmi2` `gfni` `aesni` `vaes` `sha1` `sha256` `sha512` `sm3` `sm4` `rdrand` `rdseed` `tsx`|
65+
|x86|`mmx` `sse` `sse2` `sse3` `ssse3` `sse41` `sse42` `sse4a` `xop` `avx` `f16c` `fma` `fma4` `avx2` `avx512f` `avx512bw` `avx512cd` `avx512dq` `avx512vl` `avx512vnni` `avx512bf16` `avx512ifma` `avx512vbmi` `avx512vbmi2` `avx512fp16` `avx512bitalg` `avx512vpopcntdq` `avx512vp2intersect` `vpclmulqdq` `avx512er` `avx5124fmaps` `avx5124vnniw` `avxvnni` `avxvnniint8` `avxvnniint16` `avxifma` `avxneconvert` `amxfp16` `amxbf16` `amxint8` `amxtile` `bmi1` `bmi2` `adx` `lzcnt` `tbm` `gfni` `aesni` `vaes` `sha1` `sha256` `sha512` `sm3` `sm4` `clzero` `rdpru` `rdrand` `rdseed` `tsx`|
6666
|arm|`half` `edsp` `neon` `vfpv4` `idiv`|
6767
|aarch64|`neon` `vfpv4` `lse` `cpuid` `asimdrdm` `asimdhp` `asimddp` `asimdfhm` `bf16` `i8mm` `frint` `jscvt` `fcma` `mte` `mte2` `sve` `sve2` `sve2p1` `svebf16` `svei8mm` `svef32mm` `svef64mm` `sme` `smef16f16` `smef64f64` `smei64i64` `pmull` `crc32` `aes` `sha1` `sha2` `sha3` `sha512` `sm3` `sm4` `svepmull` `svebitperm` `sveaes` `svesha3` `svesm4` `amx` `paca` `pacg`|
6868
|mips|`msa` `mmi` `sx` `asx` `msa2` `crypto`|

main.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ int main()
4141
PRINT_ISA_SUPPORT(avx512vbmi)
4242
PRINT_ISA_SUPPORT(avx512vbmi2)
4343
PRINT_ISA_SUPPORT(avx512fp16)
44+
PRINT_ISA_SUPPORT(avx512bitalg)
45+
PRINT_ISA_SUPPORT(avx512vpopcntdq)
46+
PRINT_ISA_SUPPORT(avx512vp2intersect)
47+
PRINT_ISA_SUPPORT(vpclmulqdq)
4448
PRINT_ISA_SUPPORT(avx512er)
4549
PRINT_ISA_SUPPORT(avx5124fmaps)
4650
PRINT_ISA_SUPPORT(avx5124vnniw)
@@ -66,6 +70,11 @@ int main()
6670
PRINT_ISA_SUPPORT(rdrand)
6771
PRINT_ISA_SUPPORT(rdseed)
6872
PRINT_ISA_SUPPORT(tsx)
73+
PRINT_ISA_SUPPORT(adx)
74+
PRINT_ISA_SUPPORT(lzcnt)
75+
PRINT_ISA_SUPPORT(tbm)
76+
PRINT_ISA_SUPPORT(clzero)
77+
PRINT_ISA_SUPPORT(rdpru)
6978

7079
#elif __aarch64__ || defined(_M_ARM64)
7180
PRINT_ISA_SUPPORT(neon)

ruapu.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,10 @@ RUAPU_INSTCODE(avx512ifma, 0x62, 0xf2, 0xfd, 0x48, 0xb4, 0xc0) // vpmadd52luq zm
247247
RUAPU_INSTCODE(avx512vbmi, 0x62, 0xf2, 0x7d, 0x48, 0x75, 0xc0) // vpermi2b zmm0,zmm0,zmm0
248248
RUAPU_INSTCODE(avx512vbmi2, 0x62, 0xf2, 0x7d, 0x48, 0x71, 0xc0) // vpshldvd zmm0,zmm0,zmm0
249249
RUAPU_INSTCODE(avx512fp16, 0x62, 0xf6, 0x7d, 0x48, 0x98, 0xc0) // vfmadd132ph zmm0,zmm0,zmm0
250+
RUAPU_INSTCODE(avx512bitalg, 0x62, 0xf2, 0x7d, 0x48, 0x54, 0xc0) // vpopcntb zmm0,zmm0
251+
RUAPU_INSTCODE(avx512vpopcntdq, 0x62, 0xf2, 0x7d, 0x48, 0x55, 0xc0) // vpopcntd zmm0,zmm0
252+
RUAPU_INSTCODE(avx512vp2intersect, 0x62, 0xf2, 0x7f, 0x48, 0x68, 0xc8) // vp2intersectd k1,zmm0,zmm0
253+
RUAPU_INSTCODE(vpclmulqdq, 0x62, 0xf3, 0x7d, 0x48, 0x44, 0xc0, 0x00) // vpclmullqlqdq zmm0,zmm0,zmm0
250254
// TODO:avx512pf, vgatherpf1dps DWORD PTR [esp+zmm0*1]{k1}
251255
RUAPU_INSTCODE(avx512er, 0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xc0) //vexp2pd zmm0,zmm0
252256
RUAPU_INSTCODE(avx5124fmaps, 0x67, 0x62, 0xf2, 0x7f, 0x48, 0x9a, 0x04, 0x24) //v4fmaddps zmm0,zmm0,XMMWORD PTR [esp]
@@ -273,6 +277,11 @@ RUAPU_INSTCODE(sm4, 0xc4, 0xe2, 0x7e, 0xda, 0xc0) // vsm4key4 ymm0,ymm0,ymm0
273277
RUAPU_INSTCODE(rdrand, 0x0f, 0xc7, 0xf0) // rdrand eax
274278
RUAPU_INSTCODE(rdseed, 0x0f, 0xc7, 0xf8) // rdseed eax
275279
RUAPU_INSTCODE(tsx, 0x0f, 0x01, 0xd6) // xtest
280+
RUAPU_INSTCODE(adx, 0x66, 0x0f, 0x38, 0xf6, 0xc0) // adcx eax,eax
281+
RUAPU_INSTCODE(lzcnt, 0xf3, 0x0f, 0xbd, 0xc0) // lzcnt eax,eax
282+
RUAPU_INSTCODE(tbm, 0x8f, 0xe9, 0x78, 0x01, 0xc8) // blcfill eax,eax
283+
RUAPU_INSTCODE(clzero, 0x48, 0x8d, 0x04, 0x24, 0x0f, 0x01, 0xfc) // lea rax,[rsp] + clzero
284+
RUAPU_INSTCODE(rdpru, 0x0f, 0x01, 0xfd) // rdpru
276285

277286
#elif __aarch64__ || defined(_M_ARM64)
278287
RUAPU_INSTCODE(neon, 0x4e20d400) // fadd v0.4s,v0.4s,v0.4s
@@ -494,6 +503,10 @@ RUAPU_ISAENTRY(avx512ifma)
494503
RUAPU_ISAENTRY(avx512vbmi)
495504
RUAPU_ISAENTRY(avx512vbmi2)
496505
RUAPU_ISAENTRY(avx512fp16)
506+
RUAPU_ISAENTRY(avx512bitalg)
507+
RUAPU_ISAENTRY(avx512vpopcntdq)
508+
RUAPU_ISAENTRY(avx512vp2intersect)
509+
RUAPU_ISAENTRY(vpclmulqdq)
497510
// TODO:avx512pf
498511
RUAPU_ISAENTRY(avx512er)
499512
RUAPU_ISAENTRY(avx5124fmaps)
@@ -520,6 +533,11 @@ RUAPU_ISAENTRY(sm4)
520533
RUAPU_ISAENTRY(rdrand)
521534
RUAPU_ISAENTRY(rdseed)
522535
RUAPU_ISAENTRY(tsx)
536+
RUAPU_ISAENTRY(adx)
537+
RUAPU_ISAENTRY(lzcnt)
538+
RUAPU_ISAENTRY(tbm)
539+
RUAPU_ISAENTRY(clzero)
540+
RUAPU_ISAENTRY(rdpru)
523541

524542
#elif __aarch64__ || defined(_M_ARM64)
525543
RUAPU_ISAENTRY(neon)

0 commit comments

Comments
 (0)