#![warn(missing_docs)]
extern crate unicode_bidi;
extern crate unicode_normalization;
extern crate unicode_properties;
use std::borrow::Cow;
use std::fmt;
use unicode_normalization::UnicodeNormalization;
use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory};
mod rfc3454;
pub mod tables;
#[derive(Debug)]
enum ErrorCause {
ProhibitedCharacter(char),
ProhibitedBidirectionalText,
StartsWithCombiningCharacter,
EmptyString,
}
#[derive(Debug)]
pub struct Error(ErrorCause);
impl fmt::Display for Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
ErrorCause::ProhibitedCharacter(c) => write!(fmt, "prohibited character `{}`", c),
ErrorCause::ProhibitedBidirectionalText => write!(fmt, "prohibited bidirectional text"),
ErrorCause::StartsWithCombiningCharacter => {
write!(fmt, "starts with combining character")
}
ErrorCause::EmptyString => write!(fmt, "empty string"),
}
}
}
impl std::error::Error for Error {}
pub fn saslprep(s: &str) -> Result<Cow<'_, str>, Error> {
if s.chars()
.all(|c| c.is_ascii() && !tables::ascii_control_character(c))
{
return Ok(Cow::Borrowed(s));
}
let mapped = s
.chars()
.map(|c| {
if tables::non_ascii_space_character(c) {
' '
} else {
c
}
})
.filter(|&c| !tables::commonly_mapped_to_nothing(c));
let normalized = mapped.nfkc().collect::<String>();
let prohibited = normalized.chars().find(|&c| {
tables::non_ascii_space_character(c) ||
tables::ascii_control_character(c) ||
tables::non_ascii_control_character(c) ||
tables::private_use(c) ||
tables::non_character_code_point(c) ||
tables::surrogate_code(c) ||
tables::inappropriate_for_plain_text(c) ||
tables::inappropriate_for_canonical_representation(c) ||
tables::change_display_properties_or_deprecated(c) ||
tables::tagging_character(c) });
if let Some(c) = prohibited {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
if is_prohibited_bidirectional_text(&normalized) {
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
}
let unassigned = normalized
.chars()
.find(|&c| tables::unassigned_code_point(c));
if let Some(c) = unassigned {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
Ok(Cow::Owned(normalized))
}
fn is_prohibited_bidirectional_text(s: &str) -> bool {
if s.contains(tables::bidi_r_or_al) {
if s.contains(tables::bidi_l) {
return true;
}
if !tables::bidi_r_or_al(s.chars().next().unwrap())
|| !tables::bidi_r_or_al(s.chars().next_back().unwrap())
{
return true;
}
}
false
}
pub fn nameprep(s: &str) -> Result<Cow<'_, str>, Error> {
if s.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.' || c == '-')
{
return Ok(Cow::Borrowed(s));
}
let mapped = s
.chars()
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
.flat_map(tables::case_fold_for_nfkc);
let normalized = mapped.nfkc().collect::<String>();
let prohibited = normalized.chars().find(|&c| {
tables::non_ascii_space_character(c) ||
tables::non_ascii_control_character(c) ||
tables::private_use(c) ||
tables::non_character_code_point(c) ||
tables::surrogate_code(c) ||
tables::inappropriate_for_plain_text(c) ||
tables::inappropriate_for_canonical_representation(c) ||
tables::change_display_properties_or_deprecated(c) ||
tables::tagging_character(c) });
if let Some(c) = prohibited {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
if is_prohibited_bidirectional_text(&normalized) {
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
}
let unassigned = normalized
.chars()
.find(|&c| tables::unassigned_code_point(c));
if let Some(c) = unassigned {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
Ok(Cow::Owned(normalized))
}
pub fn nodeprep(s: &str) -> Result<Cow<'_, str>, Error> {
if s.chars()
.all(|c| matches!(c, '['..='~' | '0'..='9' | '('..='.' | '#'..='%'))
{
return Ok(Cow::Borrowed(s));
}
let mapped = s
.chars()
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
.flat_map(tables::case_fold_for_nfkc);
let normalized = mapped.nfkc().collect::<String>();
let prohibited = normalized.chars().find(|&c| {
tables::ascii_space_character(c) ||
tables::non_ascii_space_character(c) ||
tables::ascii_control_character(c) ||
tables::non_ascii_control_character(c) ||
tables::private_use(c) ||
tables::non_character_code_point(c) ||
tables::surrogate_code(c) ||
tables::inappropriate_for_plain_text(c) ||
tables::inappropriate_for_canonical_representation(c) ||
tables::change_display_properties_or_deprecated(c) ||
tables::tagging_character(c) ||
prohibited_node_character(c)
});
if let Some(c) = prohibited {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
if is_prohibited_bidirectional_text(&normalized) {
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
}
let unassigned = normalized
.chars()
.find(|&c| tables::unassigned_code_point(c));
if let Some(c) = unassigned {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
Ok(Cow::Owned(normalized))
}
fn prohibited_node_character(c: char) -> bool {
matches!(c, '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@')
}
pub fn resourceprep(s: &str) -> Result<Cow<'_, str>, Error> {
if s.chars().all(|c| matches!(c, ' '..='~')) {
return Ok(Cow::Borrowed(s));
}
let mapped = s
.chars()
.filter(|&c| !tables::commonly_mapped_to_nothing(c))
.collect::<String>();
let normalized = mapped.nfkc().collect::<String>();
let prohibited = normalized.chars().find(|&c| {
tables::non_ascii_space_character(c) ||
tables::ascii_control_character(c) ||
tables::non_ascii_control_character(c) ||
tables::private_use(c) ||
tables::non_character_code_point(c) ||
tables::surrogate_code(c) ||
tables::inappropriate_for_plain_text(c) ||
tables::inappropriate_for_canonical_representation(c) ||
tables::change_display_properties_or_deprecated(c) ||
tables::tagging_character(c) });
if let Some(c) = prohibited {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
if is_prohibited_bidirectional_text(&normalized) {
return Err(Error(ErrorCause::ProhibitedBidirectionalText));
}
let unassigned = normalized
.chars()
.find(|&c| tables::unassigned_code_point(c));
if let Some(c) = unassigned {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
Ok(Cow::Owned(normalized))
}
pub fn x520prep(s: &str, case_fold: bool) -> Result<Cow<'_, str>, Error> {
if s.is_empty() {
return Err(Error(ErrorCause::EmptyString));
}
if s.chars()
.all(|c| matches!(c, ' '..='~') && (!case_fold || c.is_ascii_lowercase()))
{
return Ok(Cow::Borrowed(s));
}
let mapped = s
.chars()
.filter(|&c| !tables::x520_mapped_to_nothing(c))
.map(|c| {
if tables::x520_mapped_to_space(c) {
' '
} else {
c
}
});
let normalized = if case_fold {
mapped
.flat_map(tables::case_fold_for_nfkc)
.collect::<String>()
} else {
mapped.nfkc().collect::<String>()
};
let prohibited = normalized.chars().find(
|&c| {
tables::unassigned_code_point(c)
|| tables::private_use(c)
|| tables::non_character_code_point(c)
|| tables::surrogate_code(c)
|| c == '\u{FFFD}'
}, );
if let Some(c) = prohibited {
return Err(Error(ErrorCause::ProhibitedCharacter(c)));
}
match s.chars().next() {
Some(c) => {
if c.general_category_group() == GeneralCategoryGroup::Mark {
return Err(Error(ErrorCause::StartsWithCombiningCharacter));
}
}
None => return Err(Error(ErrorCause::EmptyString)),
}
Ok(normalized.into())
}
#[cfg(test)]
mod test {
use super::*;
fn assert_prohibited_character<T>(result: Result<T, Error>) {
match result {
Err(Error(ErrorCause::ProhibitedCharacter(_))) => (),
_ => panic!(),
}
}
fn assert_starts_with_combining_char<T>(result: Result<T, Error>) {
match result {
Err(Error(ErrorCause::StartsWithCombiningCharacter)) => (),
_ => panic!(),
}
}
#[test]
fn saslprep_examples() {
assert_prohibited_character(saslprep("\u{0007}"));
}
#[test]
fn nodeprep_examples() {
assert_prohibited_character(nodeprep(" "));
assert_prohibited_character(nodeprep("\u{00a0}"));
assert_prohibited_character(nodeprep("foo@bar"));
}
#[test]
fn resourceprep_examples() {
assert_eq!("foo@bar", resourceprep("foo@bar").unwrap());
}
#[test]
fn x520prep_examples() {
assert_eq!(x520prep("foo@bar", true).unwrap(), "foo@bar");
assert_eq!(
x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", false).unwrap(),
"J. W. wuz h\u{0115}re"
);
assert_eq!(
x520prep("J.\u{FE00} \u{9}W. \u{B}wuz h\u{0115}re", true).unwrap(),
"j. w. wuz h\u{0115}re"
);
assert_eq!(x520prep("UPPERCASED", true).unwrap(), "uppercased");
assert_starts_with_combining_char(x520prep("\u{0306}hello", true));
}
#[test]
fn ascii_optimisations() {
if let Cow::Owned(_) = nodeprep("nodepart").unwrap() {
panic!("“nodepart” should get optimised as ASCII");
}
if let Cow::Owned(_) = nameprep("domainpart.example").unwrap() {
panic!("“domainpart.example” should get optimised as ASCII");
}
if let Cow::Owned(_) = resourceprep("resourcepart").unwrap() {
panic!("“resourcepart” should get optimised as ASCII");
}
}
}