use unicode_bidi::{bidi_class, BidiClass};
use std::cmp::Ordering;
use std::str::Chars;
use super::rfc3454;
pub fn unassigned_code_point(c: char) -> bool {
rfc3454::A_1
.binary_search_by(|&(start, end)| if start > c {
Ordering::Greater
} else if end < c {
Ordering::Less
} else {
Ordering::Equal
})
.is_ok()
}
pub fn commonly_mapped_to_nothing(c: char) -> bool {
match c {
'\u{00AD}' | '\u{034F}' | '\u{1806}' | '\u{180B}' | '\u{180C}' | '\u{180D}' |
'\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{2060}' | '\u{FE00}' | '\u{FE01}' |
'\u{FE02}' | '\u{FE03}' | '\u{FE04}' | '\u{FE05}' | '\u{FE06}' | '\u{FE07}' |
'\u{FE08}' | '\u{FE09}' | '\u{FE0A}' | '\u{FE0B}' | '\u{FE0C}' | '\u{FE0D}' |
'\u{FE0E}' | '\u{FE0F}' | '\u{FEFF}' => true,
_ => false,
}
}
pub fn case_fold_for_nfkc(c: char) -> CaseFoldForNfkc {
let inner = match rfc3454::B_2.binary_search_by_key(&c, |e| e.0) {
Ok(idx) => FoldInner::Chars(rfc3454::B_2[idx].1.chars()),
Err(_) => FoldInner::Char(Some(c)),
};
CaseFoldForNfkc(inner)
}
enum FoldInner {
Chars(Chars<'static>),
Char(Option<char>),
}
pub struct CaseFoldForNfkc(FoldInner);
impl Iterator for CaseFoldForNfkc {
type Item = char;
fn next(&mut self) -> Option<char> {
match self.0 {
FoldInner::Chars(ref mut it) => it.next(),
FoldInner::Char(ref mut ch) => ch.take(),
}
}
}
pub fn ascii_space_character(c: char) -> bool {
c == ' '
}
pub fn non_ascii_space_character(c: char) -> bool {
match c {
'\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' |
'\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' |
'\u{200A}' | '\u{200B}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
_ => false,
}
}
pub fn ascii_control_character(c: char) -> bool {
match c {
'\u{0000}'...'\u{001F}' |
'\u{007F}' => true,
_ => false,
}
}
pub fn non_ascii_control_character(c: char) -> bool {
match c {
'\u{0080}'...'\u{009F}' |
'\u{06DD}' |
'\u{070F}' |
'\u{180E}' |
'\u{200C}' |
'\u{200D}' |
'\u{2028}' |
'\u{2029}' |
'\u{2060}' |
'\u{2061}' |
'\u{2062}' |
'\u{2063}' |
'\u{206A}'...'\u{206F}' |
'\u{FEFF}' |
'\u{FFF9}'...'\u{FFFC}' |
'\u{1D173}'...'\u{1D17A}' => true,
_ => false,
}
}
pub fn private_use(c: char) -> bool {
match c {
'\u{E000}'...'\u{F8FF}' |
'\u{F0000}'...'\u{FFFFD}' |
'\u{100000}'...'\u{10FFFD}' => true,
_ => false,
}
}
pub fn non_character_code_point(c: char) -> bool {
match c {
'\u{FDD0}'...'\u{FDEF}' |
'\u{FFFE}'...'\u{FFFF}' |
'\u{1FFFE}'...'\u{1FFFF}' |
'\u{2FFFE}'...'\u{2FFFF}' |
'\u{3FFFE}'...'\u{3FFFF}' |
'\u{4FFFE}'...'\u{4FFFF}' |
'\u{5FFFE}'...'\u{5FFFF}' |
'\u{6FFFE}'...'\u{6FFFF}' |
'\u{7FFFE}'...'\u{7FFFF}' |
'\u{8FFFE}'...'\u{8FFFF}' |
'\u{9FFFE}'...'\u{9FFFF}' |
'\u{AFFFE}'...'\u{AFFFF}' |
'\u{BFFFE}'...'\u{BFFFF}' |
'\u{CFFFE}'...'\u{CFFFF}' |
'\u{DFFFE}'...'\u{DFFFF}' |
'\u{EFFFE}'...'\u{EFFFF}' |
'\u{FFFFE}'...'\u{FFFFF}' |
'\u{10FFFE}'...'\u{10FFFF}' => true,
_ => false,
}
}
pub fn surrogate_code(c: char) -> bool {
match c {
_ => false,
}
}
pub fn inappropriate_for_plain_text(c: char) -> bool {
match c {
'\u{FFF9}' | '\u{FFFA}' | '\u{FFFB}' | '\u{FFFC}' | '\u{FFFD}' => true,
_ => false,
}
}
pub fn inappropriate_for_canonical_representation(c: char) -> bool {
match c {
'\u{2FF0}'...'\u{2FFB}' => true,
_ => false,
}
}
pub fn change_display_properties_or_deprecated(c: char) -> bool {
match c {
'\u{0340}' | '\u{0341}' | '\u{200E}' | '\u{200F}' | '\u{202A}' | '\u{202B}' |
'\u{202C}' | '\u{202D}' | '\u{202E}' | '\u{206A}' | '\u{206B}' | '\u{206C}' |
'\u{206D}' | '\u{206E}' | '\u{206F}' => true,
_ => false,
}
}
pub fn tagging_character(c: char) -> bool {
match c {
'\u{E0001}' |
'\u{E0020}'...'\u{E007F}' => true,
_ => false,
}
}
pub fn bidi_r_or_al(c: char) -> bool {
match bidi_class(c) {
BidiClass::R | BidiClass::AL => true,
_ => false,
}
}
pub fn bidi_l(c: char) -> bool {
match bidi_class(c) {
BidiClass::L => true,
_ => false,
}
}