1#![warn(missing_docs)]
5extern crate unicode_bidi;
6extern crate unicode_normalization;
7extern crate unicode_properties;
8
9use std::borrow::Cow;
10use std::fmt;
11use unicode_normalization::UnicodeNormalization;
12use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory};
13
14mod rfc3454;
15pub mod tables;
16
17#[derive(Debug)]
19enum ErrorCause {
20 ProhibitedCharacter(char),
22 ProhibitedBidirectionalText,
24 StartsWithCombiningCharacter,
26 EmptyString,
28}
29
30#[derive(Debug)]
32pub struct Error(ErrorCause);
33
34impl fmt::Display for Error {
35 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
36 match self.0 {
37 ErrorCause::ProhibitedCharacter(c) => write!(fmt, "prohibited character `{}`", c),
38 ErrorCause::ProhibitedBidirectionalText => write!(fmt, "prohibited bidirectional text"),
39 ErrorCause::StartsWithCombiningCharacter => {
40 write!(fmt, "starts with combining character")
41 }
42 ErrorCause::EmptyString => write!(fmt, "empty string"),
43 }
44 }
45}
46
47impl std::error::Error for Error {}
48
49pub fn saslprep(s: &str) -> Result<Cow<'_, str>, Error> {
55 if s.chars()
57 .all(|c| c.is_ascii() && !tables::ascii_control_character(c))
58 {
59 return Ok(Cow::Borrowed(s));
60 }
61
62 let mapped = s
64 .chars()
65 .map(|c| {
66 if tables::non_ascii_space_character(c) {
67 ' '
68 } else {
69 c
70 }
71 })
72 .filter(|&c| !tables::commonly_mapped_to_nothing(c));
73
74 let normalized = mapped.nfkc().collect::<String>();
76
77 let prohibited = normalized.chars().find(|&c| {
79 tables::non_ascii_space_character(c) ||
80 tables::ascii_control_character(c) ||
81 tables::non_ascii_control_character(c) ||
82 tables::private_use(c) ||
83 tables::non_character_code_point(c) ||
84 tables::surrogate_code(c) ||
85 tables::inappropriate_for_plain_text(c) ||
86 tables::inappropriate_for_canonical_representation(c) ||
87 tables::change_display_properties_or_deprecated(c) ||
88 tables::tagging_character(c) });
90 if let Some(c) = prohibited {
91 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
92 }
93
94 if is_prohibited_bidirectional_text(&normalized) {
96 return Err(Error(ErrorCause::ProhibitedBidirectionalText));
97 }
98
99 let unassigned = normalized
101 .chars()
102 .find(|&c| tables::unassigned_code_point(c));
103 if let Some(c) = unassigned {
104 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
105 }
106
107 Ok(Cow::Owned(normalized))
108}
109
110fn is_prohibited_bidirectional_text(s: &str) -> bool {
112 if s.contains(tables::bidi_r_or_al) {
113 if s.contains(tables::bidi_l) {
116 return true;
117 }
118
119 if !tables::bidi_r_or_al(s.chars().next().unwrap())
123 || !tables::bidi_r_or_al(s.chars().next_back().unwrap())
124 {
125 return true;
126 }
127 }
128
129 false
130}
131
132pub fn nameprep(s: &str) -> Result<Cow<'_, str>, Error> {
138 if s.chars()
140 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.' || c == '-')
141 {
142 return Ok(Cow::Borrowed(s));
143 }
144
145 let mapped = s
147 .chars()
148 .filter(|&c| !tables::commonly_mapped_to_nothing(c))
149 .flat_map(tables::case_fold_for_nfkc);
150
151 let normalized = mapped.nfkc().collect::<String>();
153
154 let prohibited = normalized.chars().find(|&c| {
156 tables::non_ascii_space_character(c) ||
157 tables::non_ascii_control_character(c) ||
158 tables::private_use(c) ||
159 tables::non_character_code_point(c) ||
160 tables::surrogate_code(c) ||
161 tables::inappropriate_for_plain_text(c) ||
162 tables::inappropriate_for_canonical_representation(c) ||
163 tables::change_display_properties_or_deprecated(c) ||
164 tables::tagging_character(c) });
166 if let Some(c) = prohibited {
167 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
168 }
169
170 if is_prohibited_bidirectional_text(&normalized) {
172 return Err(Error(ErrorCause::ProhibitedBidirectionalText));
173 }
174
175 let unassigned = normalized
177 .chars()
178 .find(|&c| tables::unassigned_code_point(c));
179 if let Some(c) = unassigned {
180 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
181 }
182
183 Ok(Cow::Owned(normalized))
184}
185
186pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
192 if s.chars()
194 .all(|c| matches!(c, '['..='~' | '0'..='9' | '('..='.' | '#'..='%'))
195 {
196 return Ok(Cow::Borrowed(s));
197 }
198
199 let mapped = s
201 .chars()
202 .filter(|&c| !tables::commonly_mapped_to_nothing(c))
203 .flat_map(tables::case_fold_for_nfkc);
204
205 let normalized = mapped.nfkc().collect::<String>();
207
208 let prohibited = normalized.chars().find(|&c| {
210 tables::ascii_space_character(c) ||
211 tables::non_ascii_space_character(c) ||
212 tables::ascii_control_character(c) ||
213 tables::non_ascii_control_character(c) ||
214 tables::private_use(c) ||
215 tables::non_character_code_point(c) ||
216 tables::surrogate_code(c) ||
217 tables::inappropriate_for_plain_text(c) ||
218 tables::inappropriate_for_canonical_representation(c) ||
219 tables::change_display_properties_or_deprecated(c) ||
220 tables::tagging_character(c) ||
221 prohibited_node_character(c)
222 });
223 if let Some(c) = prohibited {
224 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
225 }
226
227 if is_prohibited_bidirectional_text(&normalized) {
229 return Err(Error(ErrorCause::ProhibitedBidirectionalText));
230 }
231
232 let unassigned = normalized
233 .chars()
234 .find(|&c| tables::unassigned_code_point(c));
235 if let Some(c) = unassigned {
236 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
237 }
238
239 Ok(Cow::Owned(normalized))
240}
241
242fn prohibited_node_character(c: char) -> bool {
244 matches!(c, '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@')
245}
246
247pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
253 if s.chars().all(|c| matches!(c, ' '..='~')) {
255 return Ok(Cow::Borrowed(s));
256 }
257
258 let mapped = s
260 .chars()
261 .filter(|&c| !tables::commonly_mapped_to_nothing(c))
262 .collect::<String>();
263
264 let normalized = mapped.nfkc().collect::<String>();
266
267 let prohibited = normalized.chars().find(|&c| {
269 tables::non_ascii_space_character(c) ||
270 tables::ascii_control_character(c) ||
271 tables::non_ascii_control_character(c) ||
272 tables::private_use(c) ||
273 tables::non_character_code_point(c) ||
274 tables::surrogate_code(c) ||
275 tables::inappropriate_for_plain_text(c) ||
276 tables::inappropriate_for_canonical_representation(c) ||
277 tables::change_display_properties_or_deprecated(c) ||
278 tables::tagging_character(c) });
280 if let Some(c) = prohibited {
281 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
282 }
283
284 if is_prohibited_bidirectional_text(&normalized) {
286 return Err(Error(ErrorCause::ProhibitedBidirectionalText));
287 }
288
289 let unassigned = normalized
290 .chars()
291 .find(|&c| tables::unassigned_code_point(c));
292 if let Some(c) = unassigned {
293 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
294 }
295
296 Ok(Cow::Owned(normalized))
297}
298
299pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
306 if s.is_empty() {
307 return Err(Error(ErrorCause::EmptyString));
308 }
309 if s.chars()
310 .all(|c| matches!(c, ' '..='~') && (!case_fold || c.is_ascii_lowercase()))
311 {
312 return Ok(Cow::Borrowed(s));
313 }
314
315 let mapped = s
320 .chars()
321 .filter(|&c| !tables::x520_mapped_to_nothing(c))
322 .map(|c| {
323 if tables::x520_mapped_to_space(c) {
324 ' '
325 } else {
326 c
327 }
328 });
329
330 let normalized = if case_fold {
332 mapped
333 .flat_map(tables::case_fold_for_nfkc)
334 .collect::<String>()
335 } else {
336 mapped.nfkc().collect::<String>()
337 };
338
339 let prohibited = normalized.chars().find(
341 |&c| {
342 tables::unassigned_code_point(c)
343 || tables::private_use(c)
344 || tables::non_character_code_point(c)
345 || tables::surrogate_code(c)
346 || c == '\u{FFFD}'
347 }, );
349 if let Some(c) = prohibited {
350 return Err(Error(ErrorCause::ProhibitedCharacter(c)));
351 }
352 match s.chars().next() {
355 Some(c) => {
356 if c.general_category_group() == GeneralCategoryGroup::Mark {
357 return Err(Error(ErrorCause::StartsWithCombiningCharacter));
358 }
359 }
360 None => return Err(Error(ErrorCause::EmptyString)),
361 }
362
363 Ok(normalized.into())
372}
373
374#[cfg(test)]
375mod test {
376 use super::*;
377
378 fn assert_prohibited_character<T>(result: Result<T, Error>) {
379 match result {
380 Err(Error(ErrorCause::ProhibitedCharacter(_))) => (),
381 _ => panic!(),
382 }
383 }
384
385 fn assert_starts_with_combining_char<T>(result: Result<T, Error>) {
386 match result {
387 Err(Error(ErrorCause::StartsWithCombiningCharacter)) => (),
388 _ => panic!(),
389 }
390 }
391
392 #[test]
394 fn saslprep_examples() {
395 assert_prohibited_character(saslprep("\u{0007}"));
396 }
397
398 #[test]
399 fn nodeprep_examples() {
400 assert_prohibited_character(nodeprep(" "));
401 assert_prohibited_character(nodeprep("\u{00a0}"));
402 assert_prohibited_character(nodeprep("foo@bar"));
403 }
404
405 #[test]
406 fn resourceprep_examples() {
407 assert_eq!("foo@bar", resourceprep("foo@bar").unwrap());
408 }
409
410 #[test]
411 fn x520prep_examples() {
412 assert_eq!(x520prep("foo@bar", true).unwrap(), "foo@bar");
413 assert_eq!(
414 x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", false).unwrap(),
415 "J. W. wuz h\u{0115}re"
416 );
417 assert_eq!(
418 x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", true).unwrap(),
419 "j. w. wuz h\u{0115}re"
420 );
421 assert_eq!(x520prep("UPPERCASED", true).unwrap(), "uppercased");
422 assert_starts_with_combining_char(x520prep("\u{0306}hello", true));
423 }
424
425 #[test]
426 fn ascii_optimisations() {
427 if let Cow::Owned(_) = nodeprep("nodepart").unwrap() {
428 panic!("“nodepart” should get optimised as ASCII");
429 }
430 if let Cow::Owned(_) = nameprep("domainpart.example").unwrap() {
431 panic!("“domainpart.example” should get optimised as ASCII");
432 }
433 if let Cow::Owned(_) = resourceprep("resourcepart").unwrap() {
434 panic!("“resourcepart” should get optimised as ASCII");
435 }
436 }
437}