phf_shared/
lib.rs

1//! See [the `phf` crate's documentation][phf] for details.
2//!
3//! [phf]: https://docs.rs/phf
4
5#![doc(html_root_url = "https://docs.rs/phf_shared/0.11")]
6#![cfg_attr(not(feature = "std"), no_std)]
7
8#[cfg(feature = "std")]
9extern crate std as core;
10
11use core::fmt;
12use core::hash::{Hash, Hasher};
13use core::num::Wrapping;
14use siphasher::sip128::{Hash128, Hasher128, SipHasher13};
15
16#[non_exhaustive]
17pub struct Hashes {
18    pub g: u32,
19    pub f1: u32,
20    pub f2: u32,
21}
22
23/// A central typedef for hash keys
24///
25/// Makes experimentation easier by only needing to be updated here.
26pub type HashKey = u64;
27
28#[inline]
29pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 {
30    (Wrapping(d2) + Wrapping(f1) * Wrapping(d1) + Wrapping(f2)).0
31}
32
33/// `key` is from `phf_generator::HashState`.
34#[inline]
35pub fn hash<T: ?Sized + PhfHash>(x: &T, key: &HashKey) -> Hashes {
36    let mut hasher = SipHasher13::new_with_keys(0, *key);
37    x.phf_hash(&mut hasher);
38
39    let Hash128 {
40        h1: lower,
41        h2: upper,
42    } = hasher.finish128();
43
44    Hashes {
45        g: (lower >> 32) as u32,
46        f1: lower as u32,
47        f2: upper as u32,
48    }
49}
50
51/// Return an index into `phf_generator::HashState::map`.
52///
53/// * `hash` is from `hash()` in this crate.
54/// * `disps` is from `phf_generator::HashState::disps`.
55/// * `len` is the length of `phf_generator::HashState::map`.
56#[inline]
57pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 {
58    let (d1, d2) = disps[(hashes.g % (disps.len() as u32)) as usize];
59    displace(hashes.f1, hashes.f2, d1, d2) % (len as u32)
60}
61
62/// A trait implemented by types which can be used in PHF data structures.
63///
64/// This differs from the standard library's `Hash` trait in that `PhfHash`'s
65/// results must be architecture independent so that hashes will be consistent
66/// between the host and target when cross compiling.
67pub trait PhfHash {
68    /// Feeds the value into the state given, updating the hasher as necessary.
69    fn phf_hash<H: Hasher>(&self, state: &mut H);
70
71    /// Feeds a slice of this type into the state provided.
72    fn phf_hash_slice<H: Hasher>(data: &[Self], state: &mut H)
73    where
74        Self: Sized,
75    {
76        for piece in data {
77            piece.phf_hash(state);
78        }
79    }
80}
81
82/// Trait for printing types with `const` constructors, used by `phf_codegen` and `phf_macros`.
83pub trait FmtConst {
84    /// Print a `const` expression representing this value.
85    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
86}
87
88/// Identical to `std::borrow::Borrow` except omitting blanket impls to facilitate other
89/// borrowing patterns.
90///
91/// The same semantic requirements apply:
92///
93/// > In particular `Eq`, `Ord` and `Hash` must be equivalent for borrowed and owned values:
94/// `x.borrow() == y.borrow()` should give the same result as `x == y`.
95///
96/// (This crate's API only requires `Eq` and `PhfHash`, however.)
97///
98/// ### Motivation
99/// The conventional signature for lookup methods on collections looks something like this:
100///
101/// ```ignore
102/// impl<K, V> Map<K, V> where K: PhfHash + Eq {
103///     fn get<T: ?Sized>(&self, key: &T) -> Option<&V> where T: PhfHash + Eq, K: Borrow<T> {
104///         ...
105///     }
106/// }
107/// ```
108///
109/// This allows the key type used for lookup to be different than the key stored in the map so for
110/// example you can use `&str` to look up a value in a `Map<String, _>`. However, this runs into
111/// a problem in the case where `T` and `K` are both a `Foo<_>` type constructor but
112/// the contained type is different (even being the same type with different lifetimes).
113///
114/// The main issue for this crate's API is that, with this method signature, you cannot perform a
115/// lookup on a `Map<UniCase<&'static str>, _>` with a `UniCase<&'a str>` where `'a` is not
116/// `'static`; there is no impl of `Borrow` that resolves to
117/// `impl Borrow<UniCase<'a>> for UniCase<&'static str>` and one cannot be added either because of
118/// all the blanket impls.
119///
120/// Instead, this trait is implemented conservatively, without blanket impls, so that impls like
121/// this may be added. This is feasible since the set of types that implement `PhfHash` is
122/// intentionally small.
123///
124/// This likely won't be fixable with specialization alone but will require full support for lattice
125/// impls since we technically want to add overlapping blanket impls.
126pub trait PhfBorrow<B: ?Sized> {
127    /// Convert a reference to `self` to a reference to the borrowed type.
128    fn borrow(&self) -> &B;
129}
130
131/// Create an impl of `FmtConst` delegating to `fmt::Debug` for types that can deal with it.
132///
133/// Ideally with specialization this could be just one default impl and then specialized where
134/// it doesn't apply.
135macro_rules! delegate_debug (
136    ($ty:ty) => {
137        impl FmtConst for $ty {
138            fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139                write!(f, "{:?}", self)
140            }
141        }
142    }
143);
144
145delegate_debug!(str);
146delegate_debug!(char);
147delegate_debug!(u8);
148delegate_debug!(i8);
149delegate_debug!(u16);
150delegate_debug!(i16);
151delegate_debug!(u32);
152delegate_debug!(i32);
153delegate_debug!(u64);
154delegate_debug!(i64);
155delegate_debug!(usize);
156delegate_debug!(isize);
157delegate_debug!(u128);
158delegate_debug!(i128);
159delegate_debug!(bool);
160
161/// `impl PhfBorrow<T> for T`
162macro_rules! impl_reflexive(
163    ($($t:ty),*) => (
164        $(impl PhfBorrow<$t> for $t {
165            fn borrow(&self) -> &$t {
166                self
167            }
168        })*
169    )
170);
171
172impl_reflexive!(
173    str,
174    char,
175    u8,
176    i8,
177    u16,
178    i16,
179    u32,
180    i32,
181    u64,
182    i64,
183    usize,
184    isize,
185    u128,
186    i128,
187    bool,
188    [u8]
189);
190
191#[cfg(feature = "std")]
192impl PhfBorrow<str> for String {
193    fn borrow(&self) -> &str {
194        self
195    }
196}
197
198#[cfg(feature = "std")]
199impl PhfBorrow<[u8]> for Vec<u8> {
200    fn borrow(&self) -> &[u8] {
201        self
202    }
203}
204
205#[cfg(feature = "std")]
206delegate_debug!(String);
207
208#[cfg(feature = "std")]
209impl PhfHash for String {
210    #[inline]
211    fn phf_hash<H: Hasher>(&self, state: &mut H) {
212        (**self).phf_hash(state)
213    }
214}
215
216#[cfg(feature = "std")]
217impl PhfHash for Vec<u8> {
218    #[inline]
219    fn phf_hash<H: Hasher>(&self, state: &mut H) {
220        (**self).phf_hash(state)
221    }
222}
223
224impl<'a, T: 'a + PhfHash + ?Sized> PhfHash for &'a T {
225    fn phf_hash<H: Hasher>(&self, state: &mut H) {
226        (*self).phf_hash(state)
227    }
228}
229
230impl<'a, T: 'a + FmtConst + ?Sized> FmtConst for &'a T {
231    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232        (*self).fmt_const(f)
233    }
234}
235
236impl<'a> PhfBorrow<str> for &'a str {
237    fn borrow(&self) -> &str {
238        self
239    }
240}
241
242impl<'a> PhfBorrow<[u8]> for &'a [u8] {
243    fn borrow(&self) -> &[u8] {
244        self
245    }
246}
247
248impl PhfHash for str {
249    #[inline]
250    fn phf_hash<H: Hasher>(&self, state: &mut H) {
251        self.as_bytes().phf_hash(state)
252    }
253}
254
255impl PhfHash for [u8] {
256    #[inline]
257    fn phf_hash<H: Hasher>(&self, state: &mut H) {
258        state.write(self);
259    }
260}
261
262impl FmtConst for [u8] {
263    #[inline]
264    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
265        // slices need a leading reference
266        write!(f, "&{:?}", self)
267    }
268}
269
270#[cfg(feature = "unicase")]
271impl<S> PhfHash for unicase::UniCase<S>
272where
273    unicase::UniCase<S>: Hash,
274{
275    #[inline]
276    fn phf_hash<H: Hasher>(&self, state: &mut H) {
277        self.hash(state)
278    }
279}
280
281#[cfg(feature = "unicase")]
282impl<S> FmtConst for unicase::UniCase<S>
283where
284    S: AsRef<str>,
285{
286    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
287        if self.is_ascii() {
288            f.write_str("UniCase::ascii(")?;
289        } else {
290            f.write_str("UniCase::unicode(")?;
291        }
292
293        self.as_ref().fmt_const(f)?;
294        f.write_str(")")
295    }
296}
297
298#[cfg(feature = "unicase")]
299impl<'b, 'a: 'b, S: ?Sized + 'a> PhfBorrow<unicase::UniCase<&'b S>> for unicase::UniCase<&'a S> {
300    fn borrow(&self) -> &unicase::UniCase<&'b S> {
301        self
302    }
303}
304
305#[cfg(feature = "uncased")]
306impl PhfHash for uncased::UncasedStr {
307    #[inline]
308    fn phf_hash<H: Hasher>(&self, state: &mut H) {
309        self.hash(state)
310    }
311}
312
313#[cfg(feature = "uncased")]
314impl FmtConst for uncased::UncasedStr {
315    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
316        // transmute is not stable in const fns (rust-lang/rust#53605), so
317        // `UncasedStr::new` can't be a const fn itself, but we can inline the
318        // call to transmute here in the meantime.
319        f.write_str("unsafe { ::core::mem::transmute::<&'static str, &'static UncasedStr>(")?;
320        self.as_str().fmt_const(f)?;
321        f.write_str(") }")
322    }
323}
324
325#[cfg(feature = "uncased")]
326impl PhfBorrow<uncased::UncasedStr> for &uncased::UncasedStr {
327    fn borrow(&self) -> &uncased::UncasedStr {
328        self
329    }
330}
331
332macro_rules! sip_impl (
333    (le $t:ty) => (
334        impl PhfHash for $t {
335            #[inline]
336            fn phf_hash<H: Hasher>(&self, state: &mut H) {
337                self.to_le().hash(state);
338            }
339        }
340    );
341    ($t:ty) => (
342        impl PhfHash for $t {
343            #[inline]
344            fn phf_hash<H: Hasher>(&self, state: &mut H) {
345                self.hash(state);
346            }
347        }
348    )
349);
350
351sip_impl!(u8);
352sip_impl!(i8);
353sip_impl!(le u16);
354sip_impl!(le i16);
355sip_impl!(le u32);
356sip_impl!(le i32);
357sip_impl!(le u64);
358sip_impl!(le i64);
359sip_impl!(le usize);
360sip_impl!(le isize);
361sip_impl!(le u128);
362sip_impl!(le i128);
363sip_impl!(bool);
364
365impl PhfHash for char {
366    #[inline]
367    fn phf_hash<H: Hasher>(&self, state: &mut H) {
368        (*self as u32).phf_hash(state)
369    }
370}
371
372// minimize duplicated code since formatting drags in quite a bit
373fn fmt_array<T: core::fmt::Debug>(array: &[T], f: &mut fmt::Formatter<'_>) -> fmt::Result {
374    write!(f, "{:?}", array)
375}
376
377macro_rules! array_impl (
378    ($t:ty) => (
379        impl<const N: usize> PhfHash for [$t; N] {
380            #[inline]
381            fn phf_hash<H: Hasher>(&self, state: &mut H) {
382                for v in &self[..] {
383                    v.phf_hash(state);
384                }
385            }
386        }
387
388        impl<const N: usize> FmtConst for [$t; N] {
389            fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
390                fmt_array(self, f)
391            }
392        }
393
394        impl<const N: usize> PhfBorrow<[$t]> for [$t; N] {
395            fn borrow(&self) -> &[$t] {
396                self
397            }
398        }
399    )
400);
401
402array_impl!(u8);
403array_impl!(i8);
404array_impl!(u16);
405array_impl!(i16);
406array_impl!(u32);
407array_impl!(i32);
408array_impl!(u64);
409array_impl!(i64);
410array_impl!(usize);
411array_impl!(isize);
412array_impl!(u128);
413array_impl!(i128);
414array_impl!(bool);
415array_impl!(char);
416
417macro_rules! slice_impl (
418    ($t:ty) => {
419        impl PhfHash for [$t] {
420            #[inline]
421            fn phf_hash<H: Hasher>(&self, state: &mut H) {
422                for v in self {
423                    v.phf_hash(state);
424                }
425            }
426        }
427    };
428);
429
430slice_impl!(i8);
431slice_impl!(u16);
432slice_impl!(i16);
433slice_impl!(u32);
434slice_impl!(i32);
435slice_impl!(u64);
436slice_impl!(i64);
437slice_impl!(usize);
438slice_impl!(isize);
439slice_impl!(u128);
440slice_impl!(i128);
441slice_impl!(bool);
442slice_impl!(char);