stringprep/
lib.rs

1//! An implementation of the "stringprep" algorithm defined in [RFC 3454][].
2//!
3//! [RFC 3454]: https://tools.ietf.org/html/rfc3454
4#![warn(missing_docs)]
5extern crate unicode_bidi;
6extern crate unicode_normalization;
7extern crate unicode_properties;
8
9use std::borrow::Cow;
10use std::fmt;
11use unicode_normalization::UnicodeNormalization;
12use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory};
13
14mod rfc3454;
15pub mod tables;
16
17/// Describes why a string failed stringprep normalization.
18#[derive(Debug)]
19enum ErrorCause {
20    /// Contains stringprep prohibited characters.
21    ProhibitedCharacter(char),
22    /// Violates stringprep rules for bidirectional text.
23    ProhibitedBidirectionalText,
24    /// Starts with a combining character
25    StartsWithCombiningCharacter,
26    /// Empty String
27    EmptyString,
28}
29
30/// An error performing the stringprep algorithm.
31#[derive(Debug)]
32pub struct Error(ErrorCause);
33
34impl fmt::Display for Error {
35    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
36        match self.0 {
37            ErrorCause::ProhibitedCharacter(c) => write!(fmt, "prohibited character `{}`", c),
38            ErrorCause::ProhibitedBidirectionalText => write!(fmt, "prohibited bidirectional text"),
39            ErrorCause::StartsWithCombiningCharacter => {
40                write!(fmt, "starts with combining character")
41            }
42            ErrorCause::EmptyString => write!(fmt, "empty string"),
43        }
44    }
45}
46
47impl std::error::Error for Error {}
48
49/// Prepares a string with the SASLprep profile of the stringprep algorithm.
50///
51/// SASLprep is defined in [RFC 4013][].
52///
53/// [RFC 4013]: https://tools.ietf.org/html/rfc4013
54pub fn saslprep(s: &str) -> Result<Cow<'_, str>, Error> {
55    // fast path for ascii text
56    if s.chars()
57        .all(|c| c.is_ascii() && !tables::ascii_control_character(c))
58    {
59        return Ok(Cow::Borrowed(s));
60    }
61
62    // 2.1 Mapping
63    let mapped = s
64        .chars()
65        .map(|c| {
66            if tables::non_ascii_space_character(c) {
67                ' '
68            } else {
69                c
70            }
71        })
72        .filter(|&c| !tables::commonly_mapped_to_nothing(c));
73
74    // 2.2 Normalization
75    let normalized = mapped.nfkc().collect::<String>();
76
77    // 2.3 Prohibited Output
78    let prohibited = normalized.chars().find(|&c| {
79        tables::non_ascii_space_character(c) /* C.1.2 */ ||
80            tables::ascii_control_character(c) /* C.2.1 */ ||
81            tables::non_ascii_control_character(c) /* C.2.2 */ ||
82            tables::private_use(c) /* C.3 */ ||
83            tables::non_character_code_point(c) /* C.4 */ ||
84            tables::surrogate_code(c) /* C.5 */ ||
85            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
86            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
87            tables::change_display_properties_or_deprecated(c) /* C.8 */ ||
88            tables::tagging_character(c) /* C.9 */
89    });
90    if let Some(c) = prohibited {
91        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
92    }
93
94    // 2.4. Bidirectional Characters
95    if is_prohibited_bidirectional_text(&normalized) {
96        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
97    }
98
99    // 2.5 Unassigned Code Points
100    let unassigned = normalized
101        .chars()
102        .find(|&c| tables::unassigned_code_point(c));
103    if let Some(c) = unassigned {
104        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
105    }
106
107    Ok(Cow::Owned(normalized))
108}
109
110// RFC3454, 6. Bidirectional Characters
111fn is_prohibited_bidirectional_text(s: &str) -> bool {
112    if s.contains(tables::bidi_r_or_al) {
113        // 2) If a string contains any RandALCat character, the string
114        // MUST NOT contain any LCat character.
115        if s.contains(tables::bidi_l) {
116            return true;
117        }
118
119        // 3) If a string contains any RandALCat character, a RandALCat
120        // character MUST be the first character of the string, and a
121        // RandALCat character MUST be the last character of the string.
122        if !tables::bidi_r_or_al(s.chars().next().unwrap())
123            || !tables::bidi_r_or_al(s.chars().next_back().unwrap())
124        {
125            return true;
126        }
127    }
128
129    false
130}
131
132/// Prepares a string with the Nameprep profile of the stringprep algorithm.
133///
134/// Nameprep is defined in [RFC 3491][].
135///
136/// [RFC 3491]: https://tools.ietf.org/html/rfc3491
137pub fn nameprep(s: &str) -> Result<Cow<'_, str>, Error> {
138    // fast path for ascii text
139    if s.chars()
140        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.' || c == '-')
141    {
142        return Ok(Cow::Borrowed(s));
143    }
144
145    // 3. Mapping
146    let mapped = s
147        .chars()
148        .filter(|&c| !tables::commonly_mapped_to_nothing(c))
149        .flat_map(tables::case_fold_for_nfkc);
150
151    // 4. Normalization
152    let normalized = mapped.nfkc().collect::<String>();
153
154    // 5. Prohibited Output
155    let prohibited = normalized.chars().find(|&c| {
156        tables::non_ascii_space_character(c) /* C.1.2 */ ||
157            tables::non_ascii_control_character(c) /* C.2.2 */ ||
158            tables::private_use(c) /* C.3 */ ||
159            tables::non_character_code_point(c) /* C.4 */ ||
160            tables::surrogate_code(c) /* C.5 */ ||
161            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
162            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
163            tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
164            tables::tagging_character(c) /* C.9 */
165    });
166    if let Some(c) = prohibited {
167        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
168    }
169
170    // 6. Bidirectional Characters
171    if is_prohibited_bidirectional_text(&normalized) {
172        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
173    }
174
175    // 7 Unassigned Code Points
176    let unassigned = normalized
177        .chars()
178        .find(|&c| tables::unassigned_code_point(c));
179    if let Some(c) = unassigned {
180        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
181    }
182
183    Ok(Cow::Owned(normalized))
184}
185
186/// Prepares a string with the Nodeprep profile of the stringprep algorithm.
187///
188/// Nameprep is defined in [RFC 3920, Appendix A][].
189///
190/// [RFC 3920, Appendix A]: https://tools.ietf.org/html/rfc3920#appendix-A
191pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
192    // fast path for common ascii text
193    if s.chars()
194        .all(|c| matches!(c, '['..='~' | '0'..='9' | '('..='.' | '#'..='%'))
195    {
196        return Ok(Cow::Borrowed(s));
197    }
198
199    // A.3. Mapping
200    let mapped = s
201        .chars()
202        .filter(|&c| !tables::commonly_mapped_to_nothing(c))
203        .flat_map(tables::case_fold_for_nfkc);
204
205    // A.4. Normalization
206    let normalized = mapped.nfkc().collect::<String>();
207
208    // A.5. Prohibited Output
209    let prohibited = normalized.chars().find(|&c| {
210        tables::ascii_space_character(c) /* C.1.1 */ ||
211            tables::non_ascii_space_character(c) /* C.1.2 */ ||
212            tables::ascii_control_character(c) /* C.2.1 */ ||
213            tables::non_ascii_control_character(c) /* C.2.2 */ ||
214            tables::private_use(c) /* C.3 */ ||
215            tables::non_character_code_point(c) /* C.4 */ ||
216            tables::surrogate_code(c) /* C.5 */ ||
217            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
218            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
219            tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
220            tables::tagging_character(c) /* C.9 */ ||
221            prohibited_node_character(c)
222    });
223    if let Some(c) = prohibited {
224        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
225    }
226
227    // A.6. Bidirectional Characters
228    if is_prohibited_bidirectional_text(&normalized) {
229        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
230    }
231
232    let unassigned = normalized
233        .chars()
234        .find(|&c| tables::unassigned_code_point(c));
235    if let Some(c) = unassigned {
236        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
237    }
238
239    Ok(Cow::Owned(normalized))
240}
241
242// Additional characters not allowed in JID nodes, by RFC3920.
243fn prohibited_node_character(c: char) -> bool {
244    matches!(c, '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@')
245}
246
247/// Prepares a string with the Resourceprep profile of the stringprep algorithm.
248///
249/// Nameprep is defined in [RFC 3920, Appendix B][].
250///
251/// [RFC 3920, Appendix B]: https://tools.ietf.org/html/rfc3920#appendix-B
252pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
253    // fast path for ascii text
254    if s.chars().all(|c| matches!(c, ' '..='~')) {
255        return Ok(Cow::Borrowed(s));
256    }
257
258    // B.3. Mapping
259    let mapped = s
260        .chars()
261        .filter(|&c| !tables::commonly_mapped_to_nothing(c))
262        .collect::<String>();
263
264    // B.4. Normalization
265    let normalized = mapped.nfkc().collect::<String>();
266
267    // B.5. Prohibited Output
268    let prohibited = normalized.chars().find(|&c| {
269        tables::non_ascii_space_character(c) /* C.1.2 */ ||
270            tables::ascii_control_character(c) /* C.2.1 */ ||
271            tables::non_ascii_control_character(c) /* C.2.2 */ ||
272            tables::private_use(c) /* C.3 */ ||
273            tables::non_character_code_point(c) /* C.4 */ ||
274            tables::surrogate_code(c) /* C.5 */ ||
275            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
276            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
277            tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
278            tables::tagging_character(c) /* C.9 */
279    });
280    if let Some(c) = prohibited {
281        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
282    }
283
284    // B.6. Bidirectional Characters
285    if is_prohibited_bidirectional_text(&normalized) {
286        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
287    }
288
289    let unassigned = normalized
290        .chars()
291        .find(|&c| tables::unassigned_code_point(c));
292    if let Some(c) = unassigned {
293        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
294    }
295
296    Ok(Cow::Owned(normalized))
297}
298
299/// Prepares a string according to the procedures described in Section 7 of
300/// [ITU-T Recommendation X.520 (2019)](https://www.itu.int/rec/T-REC-X.520-201910-I/en).
301///
302/// Note that this function does _not_ remove leading, trailing, or inner
303/// spaces as described in Section 7.6, because the characters needing removal
304/// will vary across the matching rules and ASN.1 syntaxes used.
305pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
306    if s.is_empty() {
307        return Err(Error(ErrorCause::EmptyString));
308    }
309    if s.chars()
310        .all(|c| matches!(c, ' '..='~') && (!case_fold || c.is_ascii_lowercase()))
311    {
312        return Ok(Cow::Borrowed(s));
313    }
314
315    // 1. Transcode
316    // Already done because &str is enforced to be Unicode.
317
318    // 2. Map
319    let mapped = s
320        .chars()
321        .filter(|&c| !tables::x520_mapped_to_nothing(c))
322        .map(|c| {
323            if tables::x520_mapped_to_space(c) {
324                ' '
325            } else {
326                c
327            }
328        });
329
330    // 3. Normalize
331    let normalized = if case_fold {
332        mapped
333            .flat_map(tables::case_fold_for_nfkc)
334            .collect::<String>()
335    } else {
336        mapped.nfkc().collect::<String>()
337    };
338
339    // 4. Prohibit
340    let prohibited = normalized.chars().find(
341        |&c| {
342            tables::unassigned_code_point(c)
343                || tables::private_use(c)
344                || tables::non_character_code_point(c)
345                || tables::surrogate_code(c)
346                || c == '\u{FFFD}'
347        }, // REPLACEMENT CHARACTER
348    );
349    if let Some(c) = prohibited {
350        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
351    }
352    // From ITU-T Recommendation X.520, Section 7.4:
353    // "The first code point of a string is prohibited from being a combining character."
354    match s.chars().next() {
355        Some(c) => {
356            if c.general_category_group() == GeneralCategoryGroup::Mark {
357                return Err(Error(ErrorCause::StartsWithCombiningCharacter));
358            }
359        }
360        None => return Err(Error(ErrorCause::EmptyString)),
361    }
362
363    // 5. Check bidi
364    // From ITU-T Recommendation X.520, Section 7.4:
365    // "There are no bidirectional restrictions. The output string is the input string."
366    // So there is nothing to do for this step.
367
368    // 6. Insignificant Character Removal
369    // Done in calling functions.
370
371    Ok(normalized.into())
372}
373
374#[cfg(test)]
375mod test {
376    use super::*;
377
378    fn assert_prohibited_character<T>(result: Result<T, Error>) {
379        match result {
380            Err(Error(ErrorCause::ProhibitedCharacter(_))) => (),
381            _ => panic!(),
382        }
383    }
384
385    fn assert_starts_with_combining_char<T>(result: Result<T, Error>) {
386        match result {
387            Err(Error(ErrorCause::StartsWithCombiningCharacter)) => (),
388            _ => panic!(),
389        }
390    }
391
392    // RFC4013, 3. Examples
393    #[test]
394    fn saslprep_examples() {
395        assert_prohibited_character(saslprep("\u{0007}"));
396    }
397
398    #[test]
399    fn nodeprep_examples() {
400        assert_prohibited_character(nodeprep(" "));
401        assert_prohibited_character(nodeprep("\u{00a0}"));
402        assert_prohibited_character(nodeprep("foo@bar"));
403    }
404
405    #[test]
406    fn resourceprep_examples() {
407        assert_eq!("foo@bar", resourceprep("foo@bar").unwrap());
408    }
409
410    #[test]
411    fn x520prep_examples() {
412        assert_eq!(x520prep("foo@bar", true).unwrap(), "foo@bar");
413        assert_eq!(
414            x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", false).unwrap(),
415            "J.  W.  wuz h\u{0115}re"
416        );
417        assert_eq!(
418            x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", true).unwrap(),
419            "j.  w.  wuz h\u{0115}re"
420        );
421        assert_eq!(x520prep("UPPERCASED", true).unwrap(), "uppercased");
422        assert_starts_with_combining_char(x520prep("\u{0306}hello", true));
423    }
424
425    #[test]
426    fn ascii_optimisations() {
427        if let Cow::Owned(_) = nodeprep("nodepart").unwrap() {
428            panic!("“nodepart” should get optimised as ASCII");
429        }
430        if let Cow::Owned(_) = nameprep("domainpart.example").unwrap() {
431            panic!("“domainpart.example” should get optimised as ASCII");
432        }
433        if let Cow::Owned(_) = resourceprep("resourcepart").unwrap() {
434            panic!("“resourcepart” should get optimised as ASCII");
435        }
436    }
437}