1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
use super::fallback;
// We only use AVX when we can detect at runtime whether it's available, which
// requires std.
#[cfg(feature = "std")]
mod avx;
mod sse2;
/// This macro employs a gcc-like "ifunc" trick where by upon first calling
/// `memchr` (for example), CPU feature detection will be performed at runtime
/// to determine the best implementation to use. After CPU feature detection
/// is done, we replace `memchr`'s function pointer with the selection. Upon
/// subsequent invocations, the CPU-specific routine is invoked directly, which
/// skips the CPU feature detection and subsequent branch that's required.
///
/// While this typically doesn't matter for rare occurrences or when used on
/// larger haystacks, `memchr` can be called in tight loops where the overhead
/// of this branch can actually add up *and is measurable*. This trick was
/// necessary to bring this implementation up to glibc's speeds for the 'tiny'
/// benchmarks, for example.
///
/// At some point, I expect the Rust ecosystem will get a nice macro for doing
/// exactly this, at which point, we can replace our hand-jammed version of it.
///
/// N.B. The ifunc strategy does prevent function inlining of course, but
/// on modern CPUs, you'll probably end up with the AVX2 implementation,
/// which probably can't be inlined anyway---unless you've compiled your
/// entire program with AVX2 enabled. However, even then, the various memchr
/// implementations aren't exactly small, so inlining might not help anyway!
///
/// # Safety
///
/// Callers must ensure that fnty is function pointer type.
#[cfg(feature = "std")]
macro_rules! unsafe_ifunc {
($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
use std::{mem, sync::atomic::{AtomicPtr, Ordering}};
type FnRaw = *mut ();
static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
let fun =
if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
avx::$name as FnRaw
} else if cfg!(memchr_runtime_sse2) {
sse2::$name as FnRaw
} else {
fallback::$name as FnRaw
};
FN.store(fun as FnRaw, Ordering::Relaxed);
// SAFETY: By virtue of the caller contract, $fnty is a function
// pointer, which is always safe to transmute with a *mut ().
// Also, if 'fun is the AVX routine, then it is guaranteed to be
// supported since we checked the avx2 feature.
unsafe {
mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
}
}
// SAFETY: By virtue of the caller contract, $fnty is a function
// pointer, which is always safe to transmute with a *mut (). Also, if
// 'fun is the AVX routine, then it is guaranteed to be supported since
// we checked the avx2 feature.
unsafe {
let fun = FN.load(Ordering::Relaxed);
mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
}
}}
}
/// When std isn't available to provide runtime CPU feature detection, or if
/// runtime CPU feature detection has been explicitly disabled, then just
/// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64
/// targets, so no CPU feature detection is necessary.
///
/// # Safety
///
/// There are no safety requirements for this definition of the macro. It is
/// safe for all inputs since it is restricted to either the fallback routine
/// or the SSE routine, which is always safe to call on x86_64.
#[cfg(not(feature = "std"))]
macro_rules! unsafe_ifunc {
($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
if cfg!(memchr_runtime_sse2) {
unsafe { sse2::$name($($needle),+, $haystack) }
} else {
fallback::$name($($needle),+, $haystack)
}
}}
}
#[inline(always)]
pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
}
#[inline(always)]
pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, &[u8]) -> Option<usize>,
memchr2,
haystack,
n1,
n2
)
}
#[inline(always)]
pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, u8, &[u8]) -> Option<usize>,
memchr3,
haystack,
n1,
n2,
n3
)
}
#[inline(always)]
pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
}
#[inline(always)]
pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, &[u8]) -> Option<usize>,
memrchr2,
haystack,
n1,
n2
)
}
#[inline(always)]
pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, u8, &[u8]) -> Option<usize>,
memrchr3,
haystack,
n1,
n2,
n3
)
}