From e10da06e01e99fffeef1bf49138394682d94cc7d Mon Sep 17 00:00:00 2001 From: Aelin Reidel Date: Sun, 29 Mar 2026 04:09:44 +0200 Subject: [PATCH] Add LSX and LASX implementations --- Cargo.toml | 1 + README.md | 6 + bench/Cargo.toml | 2 +- src/basic.rs | 24 +++ src/compat.rs | 20 ++ src/implementation/loongarch64/lasx.rs | 252 +++++++++++++++++++++++++ src/implementation/loongarch64/lsx.rs | 244 ++++++++++++++++++++++++ src/implementation/loongarch64/mod.rs | 224 ++++++++++++++++++++++ src/implementation/mod.rs | 23 +++ src/lib.rs | 4 + 10 files changed, 799 insertions(+), 1 deletion(-) create mode 100644 src/implementation/loongarch64/lasx.rs create mode 100644 src/implementation/loongarch64/lsx.rs create mode 100644 src/implementation/loongarch64/mod.rs diff --git a/Cargo.toml b/Cargo.toml index eccd5c1a..dbfc8550 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ aarch64_neon = [] aarch64_neon_prefetch = [] armv7_neon = [] +loongarch64_lsx = [] # make the portable SIMD public implementation available (experimental, nightly only) portable_public_imp = ["public_imp"] diff --git a/README.md b/README.md index bbf40f17..fd89c029 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ This library has been thoroughly tested with sample data as well as fuzzing and * ARM64 (aarch64) SIMD is supported since Rust 1.61 * WASM (wasm32) SIMD is supported * 🆕 armv7 NEON support with the `armv7_neon` feature on nightly Rust +* 🆕 loongarch64 LSX/LASX support with the `loongarch64_lsx` feature on nightly Rust * x86-64: Up to 23 times faster than the std library on valid non-ASCII, up to four times faster on ASCII * aarch64: Up to eleven times faster than the std library on valid non-ASCII, up to four times faster on ASCII (Apple Silicon) * Faster than the original simdjson implementation @@ -92,6 +93,11 @@ runtime using the `std::arch::is_arm_feature_detected!` macro unless the CPU tar `RUSTFLAGS="-C target-feature=+neon"`. Some targets such as `thumbv7neon-linux-androideabi` and `thumbv7neon-unknown-linux-gnueabihf` have NEON enabled by default. +### LoongArch64 +Requires a recent nightly Rust compiler. The `loongarch64_neon` feature needs to be enabled. +The fastest implementation is usually selected at runtime. Compiling with the `lsx` or `lasx` target features enabled +selects the LSX/LASX implementations at compile time. + ### WASM32 For wasm32 support, the implementation is selected at compile time based on the presence of the `simd128` target feature. Use `RUSTFLAGS="-C target-feature=+simd128"` to enable the WASM SIMD implementation. WASM, at diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 42333429..b38bade5 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -29,7 +29,7 @@ simdutf8_wasmtime = ["wasmtime"] [dependencies] core_affinity = "0.8.1" criterion = "0.8.1" -simdutf8 = { version = "*", path = "..", features = ["aarch64_neon"] } +simdutf8 = { version = "*", path = "..", features = ["aarch64_neon", "loongarch64_lsx"] } simdjson-utf8 = { version = "*", path = "simdjson-utf8", optional = true } # default is cranelift which is not as performant as the llvm backend wasmer = { version = "2.1", optional = true, default-features = false } diff --git a/src/basic.rs b/src/basic.rs index 02623d8b..89aa8de0 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -260,6 +260,30 @@ pub mod imp { } } + /// Includes the loongarch64 SIMD implementations. + #[cfg(all(feature = "loongarch64_lsx", target_arch = "loongarch64"))] + pub mod loongarch64 { + /// Includes the LASX-based validation implementation for loongarch64 CPUs. + /// + /// Using the provided functionality on CPUs which do not support LASX is undefined + /// behavior and will very likely cause a crash. + pub mod lasx { + pub use crate::implementation::loongarch64::lasx::validate_utf8_basic as validate_utf8; + pub use crate::implementation::loongarch64::lasx::ChunkedUtf8ValidatorImp; + pub use crate::implementation::loongarch64::lasx::Utf8ValidatorImp; + } + + /// Includes the LSX-based validation implementation for loongarch64 CPUs. + /// + /// Using the provided functionality on CPUs which do not support LSX is undefined + /// behavior and will very likely cause a crash. + pub mod lsx { + pub use crate::implementation::loongarch64::lsx::validate_utf8_basic as validate_utf8; + pub use crate::implementation::loongarch64::lsx::ChunkedUtf8ValidatorImp; + pub use crate::implementation::loongarch64::lsx::Utf8ValidatorImp; + } + } + /// Includes the wasm32 SIMD implementations. #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] pub mod wasm32 { diff --git a/src/compat.rs b/src/compat.rs index d9482fc2..7d226ae1 100644 --- a/src/compat.rs +++ b/src/compat.rs @@ -146,6 +146,26 @@ pub mod imp { } } + /// Includes the loongarch64 LSX SIMD implementations. + #[cfg(all(feature = "loongarch64_lsx", target_arch = "loongarch64"))] + pub mod loongarch64 { + /// Includes the LASX-based validation implementation for loongarch64 LASX-compatible CPUs. + /// + /// Using the provided functionality on CPUs which do not support LASX is undefined + /// behavior and will very likely cause a crash. + pub mod lasx { + pub use crate::implementations::loongarch64::lasx::validate_utf8_compat as validate_utf8; + } + + /// Includes the LSX-based validation implementation for loongarch64 LSX-compatible CPUs. + /// + /// Using the provided functionality on CPUs which do not support LSX is undefined + /// behavior and will very likely cause a crash. + pub mod lsx { + pub use crate::implementations::loongarch64::lsx::validate_utf8_compat as validate_utf8; + } + } + /// Includes the wasm32 SIMD implementations. #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] pub mod wasm32 { diff --git a/src/implementation/loongarch64/lasx.rs b/src/implementation/loongarch64/lasx.rs new file mode 100644 index 00000000..220c93ad --- /dev/null +++ b/src/implementation/loongarch64/lasx.rs @@ -0,0 +1,252 @@ +//! Contains the loongarch64 LASX UTF-8 validation implementation. + +#[cfg(target_arch = "loongarch64")] +use core::arch::loongarch64::{ + lasx_xvand_v, lasx_xvld, lasx_xvldi, lasx_xvmskltz_b, lasx_xvmsknz_b, lasx_xvor_v, + lasx_xvpermi_q, lasx_xvpickve2gr_d, lasx_xvreplgr2vr_b, lasx_xvshuf_b, lasx_xvsrli_b, + lasx_xvssub_bu, lasx_xvxor_v, m256i, +}; + +use crate::implementation::helpers::Utf8CheckAlgorithm; + +// LASX SIMD primitives + +type SimdU8Value = crate::implementation::helpers::SimdU8Value; + +impl SimdU8Value { + #[flexpect::e(clippy::too_many_arguments)] + #[flexpect::e(clippy::cast_possible_wrap)] + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn from_32_cut_off_leading( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + let arr: [u8; 32] = [ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ]; + Self::from(lasx_xvld::<0>(arr.as_ptr().cast())) + } + + #[flexpect::e(clippy::too_many_arguments)] + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from_32_cut_off_leading( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + } + + #[flexpect::e(clippy::cast_ptr_alignment)] + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn load_from(ptr: *const u8) -> Self { + Self::from(lasx_xvld::<0>(ptr.cast())) + } + + #[flexpect::e(clippy::too_many_arguments)] + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + let src = Self::repeat_16( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + .0; + + Self::from(lasx_xvshuf_b(src, src, self.0)) + } + + #[flexpect::e(clippy::cast_possible_wrap)] + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn splat(val: u8) -> Self { + Self::from(lasx_xvreplgr2vr_b(val as i32)) + } + + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn splat0() -> Self { + Self::from(lasx_xvldi::<0>()) + } + + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn or(self, b: Self) -> Self { + Self::from(lasx_xvor_v(self.0, b.0)) + } + + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn and(self, b: Self) -> Self { + Self::from(lasx_xvand_v(self.0, b.0)) + } + + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn xor(self, b: Self) -> Self { + Self::from(lasx_xvxor_v(self.0, b.0)) + } + + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn saturating_sub(self, b: Self) -> Self { + Self::from(lasx_xvssub_bu(self.0, b.0)) + } + + // ugly but shr requires const generics + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn shr4(self) -> Self { + Self::from(lasx_xvsrli_b::<4>(self.0)) + } + + // ugly but prev requires const generics + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn prev1(self, prev: Self) -> Self { + // This lets us end up with [ prev_hi | self_lo ] + let bridge = lasx_xvpermi_q(self.0, prev.0, 0x21); + // It shuffles [ b_lo | a_lo ] | [ b_hi | a_hi ] + // ...aka [ bridge_lo | self_lo ] | [ bridge_hi | self_hi ] + // ...aka [ prev_hi | self_lo ] | [ self_lo | self_hi ] + let mask = [ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + ]; + Self::from(lasx_xvshuf_b(self.0, bridge, lasx_xvld::<0>(mask.as_ptr()))) + } + + // ugly but prev requires const generics + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn prev2(self, prev: Self) -> Self { + let bridge = lasx_xvpermi_q(self.0, prev.0, 0x21); + let mask = [ + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + ]; + Self::from(lasx_xvshuf_b(self.0, bridge, lasx_xvld::<0>(mask.as_ptr()))) + } + + // ugly but prev requires const generics + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn prev3(self, prev: Self) -> Self { + let bridge = lasx_xvpermi_q(self.0, prev.0, 0x21); + let mask = [ + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + ]; + Self::from(lasx_xvshuf_b(self.0, bridge, lasx_xvld::<0>(mask.as_ptr()))) + } + + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn any_bit_set(self) -> bool { + let nonzero_mask = lasx_xvmsknz_b(self.0); + let lo = lasx_xvpickve2gr_d::<0>(nonzero_mask); + let hi = lasx_xvpickve2gr_d::<2>(nonzero_mask); + lo != 0 || hi != 0 + } + + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn is_ascii(self) -> bool { + let high_bits = lasx_xvmskltz_b(self.0); + let lo = lasx_xvpickve2gr_d::<0>(high_bits); + let hi = lasx_xvpickve2gr_d::<2>(high_bits); + (lo | hi) == 0 + } +} + +impl From for SimdU8Value { + #[inline] + fn from(val: m256i) -> Self { + Self(val) + } +} + +impl Utf8CheckAlgorithm { + #[target_feature(enable = "lasx")] + #[inline] + unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value { + let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80)); + let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80)); + is_third_byte.or(is_fourth_byte) + } +} + +#[inline] +unsafe fn simd_prefetch(_ptr: *const u8) {} + +const PREFETCH: bool = false; +use crate::implementation::helpers::TempSimdChunkA32 as TempSimdChunk; +simd_input_256_bit!(#[target_feature(enable = "lasx")]); +algorithm_simd!(#[target_feature(enable = "lasx")]); diff --git a/src/implementation/loongarch64/lsx.rs b/src/implementation/loongarch64/lsx.rs new file mode 100644 index 00000000..f3ce5089 --- /dev/null +++ b/src/implementation/loongarch64/lsx.rs @@ -0,0 +1,244 @@ +//! Contains the loongarch64 LSX UTF-8 validation implementation. + +#[cfg(target_arch = "loongarch64")] +use core::arch::loongarch64::{ + lsx_vand_v, lsx_vld, lsx_vldi, lsx_vmskltz_b, lsx_vmsknz_b, lsx_vor_v, lsx_vpickve2gr_w, + lsx_vreplgr2vr_b, lsx_vshuf_b, lsx_vsrli_b, lsx_vssub_bu, lsx_vxor_v, m128i, +}; + +use crate::implementation::helpers::Utf8CheckAlgorithm; + +// LSX SIMD primitives + +type SimdU8Value = crate::implementation::helpers::SimdU8Value; + +impl SimdU8Value { + #[flexpect::e(clippy::too_many_arguments)] + #[flexpect::e(clippy::cast_possible_wrap)] + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn from_32_cut_off_leading( + _v0: u8, + _v1: u8, + _v2: u8, + _v3: u8, + _v4: u8, + _v5: u8, + _v6: u8, + _v7: u8, + _v8: u8, + _v9: u8, + _v10: u8, + _v11: u8, + _v12: u8, + _v13: u8, + _v14: u8, + _v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + let arr: [u8; 16] = [ + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ]; + Self::from(lsx_vld::<0>(arr.as_ptr().cast())) + } + + #[flexpect::e(clippy::too_many_arguments)] + #[flexpect::e(clippy::cast_possible_wrap)] + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + let arr: [u8; 16] = [ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ]; + Self::from(lsx_vld::<0>(arr.as_ptr().cast())) + } + + #[flexpect::e(clippy::cast_ptr_alignment)] + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn load_from(ptr: *const u8) -> Self { + Self::from(lsx_vld::<0>(ptr.cast())) + } + + #[flexpect::e(clippy::too_many_arguments)] + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + let src = Self::repeat_16( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + .0; + + Self::from(lsx_vshuf_b(src, src, self.0)) + } + + #[flexpect::e(clippy::cast_possible_wrap)] + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn splat(val: u8) -> Self { + Self::from(lsx_vreplgr2vr_b(val as i32)) + } + + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn splat0() -> Self { + Self::from(lsx_vldi::<0>()) + } + + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn or(self, b: Self) -> Self { + Self::from(lsx_vor_v(self.0, b.0)) + } + + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn and(self, b: Self) -> Self { + Self::from(lsx_vand_v(self.0, b.0)) + } + + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn xor(self, b: Self) -> Self { + Self::from(lsx_vxor_v(self.0, b.0)) + } + + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn saturating_sub(self, b: Self) -> Self { + Self::from(lsx_vssub_bu(self.0, b.0)) + } + + // ugly but shr requires const generics + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn shr4(self) -> Self { + Self::from(lsx_vsrli_b::<4>(self.0)) + } + + // ugly but prev requires const generics + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn prev1(self, prev: Self) -> Self { + let ctrl_arr: [u8; 16] = [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]; + + Self::from(lsx_vshuf_b( + prev.0, + self.0, + lsx_vld::<0>(ctrl_arr.as_ptr().cast()), + )) + } + + // ugly but prev requires const generics + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn prev2(self, prev: Self) -> Self { + let ctrl_arr: [u8; 16] = [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; + + Self::from(lsx_vshuf_b( + prev.0, + self.0, + lsx_vld::<0>(ctrl_arr.as_ptr().cast()), + )) + } + + // ugly but prev requires const generics + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn prev3(self, prev: Self) -> Self { + let ctrl_arr: [u8; 16] = [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + + Self::from(lsx_vshuf_b( + prev.0, + self.0, + lsx_vld::<0>(ctrl_arr.as_ptr().cast()), + )) + } + + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn any_bit_set(self) -> bool { + lsx_vpickve2gr_w::<0>(lsx_vmsknz_b(self.0)) != 0 + } + + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn is_ascii(self) -> bool { + lsx_vpickve2gr_w::<0>(lsx_vmskltz_b(self.0)) == 0 + } +} + +impl From for SimdU8Value { + #[inline] + fn from(val: m128i) -> Self { + Self(val) + } +} + +impl Utf8CheckAlgorithm { + #[target_feature(enable = "lsx")] + #[inline] + unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value { + let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80)); + let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80)); + is_third_byte.or(is_fourth_byte) + } +} + +#[inline] +unsafe fn simd_prefetch(_ptr: *const u8) {} + +const PREFETCH: bool = false; +use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk; +simd_input_128_bit!(#[target_feature(enable = "lsx")]); +algorithm_simd!(#[target_feature(enable = "lsx")]); diff --git a/src/implementation/loongarch64/mod.rs b/src/implementation/loongarch64/mod.rs new file mode 100644 index 00000000..7216f729 --- /dev/null +++ b/src/implementation/loongarch64/mod.rs @@ -0,0 +1,224 @@ +#[cfg(all( + feature = "loongarch64_lsx", + any( + feature = "public_imp", + // std: lsx is available for auto-selection unless lasx is selected at compile-time + all(feature = "std", not(target_feature = "lasx")), + // no-std: no lasx -> select lsx + all(not(feature = "std"), not(target_feature = "lasx"), target_feature = "lsx") + ) +))] +pub(crate) mod lsx; + +#[cfg(all( + feature = "loongarch64_lsx", + any( + feature = "public_imp", + // always available, except if no-std and no lasx support + feature = "std", + target_feature = "lasx" + ) +))] +pub(crate) mod lasx; + +// validate_utf8_basic() std: implementation auto-selection + +#[cfg(all( + feature = "loongarch64_lsx", + feature = "std", + not(target_feature = "lasx") +))] +#[inline] +pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + use super::helpers::SIMD_CHUNK_SIZE; + use core::mem; + use std::sync::atomic::{AtomicPtr, Ordering}; + + type FnRaw = *mut (); + type ValidateUtf8Fn = unsafe fn(input: &[u8]) -> Result<(), crate::basic::Utf8Error>; + + #[flexpect::e(clippy::option_if_let_else)] + #[inline] + fn get_fastest_available_implementation_basic() -> ValidateUtf8Fn { + if std::arch::is_loongarch_feature_detected!("lasx") { + lasx::validate_utf8_basic + } else if std::arch::is_loongarch_feature_detected!("lsx") { + lsx::validate_utf8_basic + } else { + super::validate_utf8_basic_fallback + } + } + + static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); + + unsafe fn get_fastest(input: &[u8]) -> core::result::Result<(), crate::basic::Utf8Error> { + let fun = get_fastest_available_implementation_basic(); + FN.store(fun as FnRaw, Ordering::Relaxed); + (fun)(input) + } + + if input.len() < SIMD_CHUNK_SIZE { + return super::validate_utf8_basic_fallback(input); + } + + let fun = FN.load(Ordering::Relaxed); + mem::transmute::(fun)(input) +} + +// validate_utf8_basic() no-std: implementation selection by config + +#[cfg(all(feature = "loongarch64_lsx", target_feature = "lasx"))] +pub(crate) unsafe fn validate_utf8_basic( + input: &[u8], +) -> core::result::Result<(), crate::basic::Utf8Error> { + validate_utf8_basic_lasx(input) +} + +#[cfg(all(feature = "loongarch64_lsx", target_feature = "lasx"))] +#[inline(never)] +unsafe fn validate_utf8_basic_lasx( + input: &[u8], +) -> core::result::Result<(), crate::basic::Utf8Error> { + lasx::validate_utf8_basic(input) +} + +#[cfg(all( + feature = "loongarch64_lsx", + not(feature = "std"), + not(target_feature = "lasx"), + target_feature = "lsx" +))] +#[inline] +pub(crate) unsafe fn validate_utf8_basic( + input: &[u8], +) -> core::result::Result<(), crate::basic::Utf8Error> { + if input.len() < super::helpers::SIMD_CHUNK_SIZE { + return super::validate_utf8_basic_fallback(input); + } + + validate_utf8_basic_lsx(input) +} + +#[cfg(all( + feature = "loongarch64_lsx", + not(feature = "std"), + not(target_feature = "lasx"), + target_feature = "lsx" +))] +#[inline] +pub(crate) unsafe fn validate_utf8_basic_lsx( + input: &[u8], +) -> core::result::Result<(), crate::basic::Utf8Error> { + lsx::validate_utf8_basic(input) +} + +#[cfg(any( + not(feature = "loongarch64_lsx"), + all( + not(feature = "std"), + not(target_feature = "lasx"), + not(target_feature = "lsx"), + ) +))] +pub(crate) use super::validate_utf8_basic_fallback as validate_utf8_basic; + +// validate_utf8_compat() std: implementation auto-selection + +#[cfg(all( + feature = "loongarch64_lsx", + feature = "std", + not(target_feature = "lasx") +))] +#[inline] +pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + use super::helpers::SIMD_CHUNK_SIZE; + use core::mem; + use std::sync::atomic::{AtomicPtr, Ordering}; + + type FnRaw = *mut (); + type ValidateUtf8CompatFn = unsafe fn(input: &[u8]) -> Result<(), crate::compat::Utf8Error>; + + #[flexpect::e(clippy::option_if_let_else)] + #[inline] + fn get_fastest_available_implementation_compat() -> ValidateUtf8CompatFn { + if std::arch::is_loongarch_feature_detected!("lasx") { + lasx::validate_utf8_compat + } else if std::arch::is_loongarch_feature_detected!("lsx") { + lsx::validate_utf8_compat + } else { + super::validate_utf8_compat_fallback + } + } + + static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); + + unsafe fn get_fastest(input: &[u8]) -> core::result::Result<(), crate::compat::Utf8Error> { + let fun = get_fastest_available_implementation_compat(); + FN.store(fun as FnRaw, Ordering::Relaxed); + (fun)(input) + } + + if input.len() < SIMD_CHUNK_SIZE { + return super::validate_utf8_compat_fallback(input); + } + + let fun = FN.load(Ordering::Relaxed); + mem::transmute::(fun)(input) +} + +// validate_utf8_compat() no-std: implementation selection by config + +#[cfg(all(feature = "loongarch64_lsx", target_feature = "lasx"))] +pub(crate) unsafe fn validate_utf8_compat( + input: &[u8], +) -> core::result::Result<(), crate::compat::Utf8Error> { + validate_utf8_compat_lasx(input) +} + +#[cfg(all(feature = "loongarch64_lsx", target_feature = "lasx"))] +#[inline(never)] +unsafe fn validate_utf8_compat_lasx( + input: &[u8], +) -> core::result::Result<(), crate::compat::Utf8Error> { + lasx::validate_utf8_compat(input) +} + +#[cfg(all( + feature = "loongarch64_lsx", + not(feature = "std"), + not(target_feature = "lasx"), + target_feature = "lsx" +))] +#[inline] +pub(crate) unsafe fn validate_utf8_compat( + input: &[u8], +) -> core::result::Result<(), crate::compat::Utf8Error> { + if input.len() < super::helpers::SIMD_CHUNK_SIZE { + return super::validate_utf8_compat_fallback(input); + } + + validate_utf8_compat_lsx(input) +} + +#[cfg(all( + feature = "loongarch64_lsx", + not(feature = "std"), + not(target_feature = "lasx"), + target_feature = "lsx" +))] +#[inline] +pub(crate) unsafe fn validate_utf8_compat_lsx( + input: &[u8], +) -> core::result::Result<(), crate::compat::Utf8Error> { + lsx::validate_utf8_compat(input) +} + +#[cfg(any( + not(feature = "loongarch64_lsx"), + all( + not(feature = "std"), + not(target_feature = "lasx"), + not(target_feature = "lsx"), + ) +))] +pub(crate) use super::validate_utf8_compat_fallback as validate_utf8_compat; diff --git a/src/implementation/mod.rs b/src/implementation/mod.rs index 589b87c9..3c7fb7ba 100644 --- a/src/implementation/mod.rs +++ b/src/implementation/mod.rs @@ -61,6 +61,27 @@ pub(super) use aarch64::validate_utf8_basic; #[cfg(target_arch = "aarch64")] pub(super) use aarch64::validate_utf8_compat; +// loongarch64 implementation + +#[cfg(target_arch = "loongarch64")] +pub(crate) mod loongarch64; + +/// Fn needed instead of re-import, otherwise not inlined in non-std case +#[flexpect::e(clippy::inline_always)] +#[inline(always)] +#[cfg(target_arch = "loongarch64")] +pub(super) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + loongarch64::validate_utf8_basic(input) +} + +/// Fn needed instead of re-import, otherwise not inlined in non-std case +#[flexpect::e(clippy::inline_always)] +#[inline(always)] +#[cfg(target_arch = "loongarch64")] +pub(super) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + loongarch64::validate_utf8_compat(input) +} + // wasm32 implementation #[cfg(target_arch = "wasm32")] @@ -79,6 +100,7 @@ pub(super) use wasm32::validate_utf8_compat; target_arch = "x86_64", target_arch = "aarch64", all(target_arch = "arm", target_feature = "v7", target_endian = "little"), + target_arch = "loongarch64", target_arch = "wasm32" )))] pub(super) use validate_utf8_basic_fallback as validate_utf8_basic; @@ -88,6 +110,7 @@ pub(super) use validate_utf8_basic_fallback as validate_utf8_basic; target_arch = "x86_64", target_arch = "aarch64", all(target_arch = "arm", target_feature = "v7", target_endian = "little"), + target_arch = "loongarch64", target_arch = "wasm32" )))] pub(super) use validate_utf8_compat_fallback as validate_utf8_compat; diff --git a/src/lib.rs b/src/lib.rs index ab0a97d6..32a4c927 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,10 @@ feature(stdarch_aarch64_prefetch) )] #![cfg_attr(feature = "portable_public_imp", feature(portable_simd))] +#![cfg_attr( + all(target_arch = "loongarch64", feature = "loongarch64_lsx"), + feature(stdarch_loongarch) +)] #![cfg_attr( all( target_arch = "arm",