1use std::cmp::Ordering;
3use std::str::Chars;
4use unicode_bidi::{bidi_class, BidiClass};
5use unicode_properties::{GeneralCategoryGroup, UnicodeGeneralCategory};
6
7use super::rfc3454;
8
9pub fn unassigned_code_point(c: char) -> bool {
11 rfc3454::A_1
12 .binary_search_by(|&(start, end)| {
13 if start > c {
14 Ordering::Greater
15 } else if end < c {
16 Ordering::Less
17 } else {
18 Ordering::Equal
19 }
20 })
21 .is_ok()
22}
23
24pub fn commonly_mapped_to_nothing(c: char) -> bool {
26 matches!(
27 c,
28 '\u{00AD}'
29 | '\u{034F}'
30 | '\u{1806}'
31 | '\u{180B}'
32 | '\u{180C}'
33 | '\u{180D}'
34 | '\u{200B}'
35 | '\u{200C}'
36 | '\u{200D}'
37 | '\u{2060}'
38 | '\u{FE00}'
39 | '\u{FE01}'
40 | '\u{FE02}'
41 | '\u{FE03}'
42 | '\u{FE04}'
43 | '\u{FE05}'
44 | '\u{FE06}'
45 | '\u{FE07}'
46 | '\u{FE08}'
47 | '\u{FE09}'
48 | '\u{FE0A}'
49 | '\u{FE0B}'
50 | '\u{FE0C}'
51 | '\u{FE0D}'
52 | '\u{FE0E}'
53 | '\u{FE0F}'
54 | '\u{FEFF}'
55 )
56}
57
58pub fn case_fold_for_nfkc(c: char) -> CaseFoldForNfkc {
60 let inner = match rfc3454::B_2.binary_search_by_key(&c, |e| e.0) {
61 Ok(idx) => FoldInner::Chars(rfc3454::B_2[idx].1.chars()),
62 Err(_) => FoldInner::Char(Some(c)),
63 };
64 CaseFoldForNfkc(inner)
65}
66
67enum FoldInner {
68 Chars(Chars<'static>),
69 Char(Option<char>),
70}
71
72pub struct CaseFoldForNfkc(FoldInner);
74
75impl Iterator for CaseFoldForNfkc {
76 type Item = char;
77
78 fn next(&mut self) -> Option<char> {
79 match self.0 {
80 FoldInner::Chars(ref mut it) => it.next(),
81 FoldInner::Char(ref mut ch) => ch.take(),
82 }
83 }
84}
85
86pub fn ascii_space_character(c: char) -> bool {
88 c == ' '
89}
90
91pub fn non_ascii_space_character(c: char) -> bool {
93 matches!(
94 c,
95 '\u{00A0}'
96 | '\u{1680}'
97 | '\u{2000}'
98 | '\u{2001}'
99 | '\u{2002}'
100 | '\u{2003}'
101 | '\u{2004}'
102 | '\u{2005}'
103 | '\u{2006}'
104 | '\u{2007}'
105 | '\u{2008}'
106 | '\u{2009}'
107 | '\u{200A}'
108 | '\u{200B}'
109 | '\u{202F}'
110 | '\u{205F}'
111 | '\u{3000}'
112 )
113}
114
115pub fn ascii_control_character(c: char) -> bool {
117 matches!(c, '\u{0000}'..='\u{001F}' | '\u{007F}')
118}
119
120pub fn non_ascii_control_character(c: char) -> bool {
122 matches!(c, '\u{0080}'..='\u{009F}'
123 | '\u{06DD}'
124 | '\u{070F}'
125 | '\u{180E}'
126 | '\u{200C}'
127 | '\u{200D}'
128 | '\u{2028}'
129 | '\u{2029}'
130 | '\u{2060}'
131 | '\u{2061}'
132 | '\u{2062}'
133 | '\u{2063}'
134 | '\u{206A}'..='\u{206F}'
135 | '\u{FEFF}'
136 | '\u{FFF9}'..='\u{FFFC}'
137 | '\u{1D173}'..='\u{1D17A}')
138}
139
140pub fn private_use(c: char) -> bool {
142 matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
143}
144
145pub fn non_character_code_point(c: char) -> bool {
147 matches!(c, '\u{FDD0}'..='\u{FDEF}'
148 | '\u{FFFE}'..='\u{FFFF}'
149 | '\u{1FFFE}'..='\u{1FFFF}'
150 | '\u{2FFFE}'..='\u{2FFFF}'
151 | '\u{3FFFE}'..='\u{3FFFF}'
152 | '\u{4FFFE}'..='\u{4FFFF}'
153 | '\u{5FFFE}'..='\u{5FFFF}'
154 | '\u{6FFFE}'..='\u{6FFFF}'
155 | '\u{7FFFE}'..='\u{7FFFF}'
156 | '\u{8FFFE}'..='\u{8FFFF}'
157 | '\u{9FFFE}'..='\u{9FFFF}'
158 | '\u{AFFFE}'..='\u{AFFFF}'
159 | '\u{BFFFE}'..='\u{BFFFF}'
160 | '\u{CFFFE}'..='\u{CFFFF}'
161 | '\u{DFFFE}'..='\u{DFFFF}'
162 | '\u{EFFFE}'..='\u{EFFFF}'
163 | '\u{FFFFE}'..='\u{FFFFF}'
164 | '\u{10FFFE}'..='\u{10FFFF}')
165}
166
167#[allow(clippy::match_single_binding)]
169pub fn surrogate_code(c: char) -> bool {
170 match c {
171 _ => false,
174 }
175}
176
177pub fn inappropriate_for_plain_text(c: char) -> bool {
179 matches!(
180 c,
181 '\u{FFF9}' | '\u{FFFA}' | '\u{FFFB}' | '\u{FFFC}' | '\u{FFFD}'
182 )
183}
184
185pub fn inappropriate_for_canonical_representation(c: char) -> bool {
187 matches!(c, '\u{2FF0}'..='\u{2FFB}')
188}
189
190pub fn change_display_properties_or_deprecated(c: char) -> bool {
192 matches!(
193 c,
194 '\u{0340}'
195 | '\u{0341}'
196 | '\u{200E}'
197 | '\u{200F}'
198 | '\u{202A}'
199 | '\u{202B}'
200 | '\u{202C}'
201 | '\u{202D}'
202 | '\u{202E}'
203 | '\u{206A}'
204 | '\u{206B}'
205 | '\u{206C}'
206 | '\u{206D}'
207 | '\u{206E}'
208 | '\u{206F}'
209 )
210}
211
212pub fn tagging_character(c: char) -> bool {
214 matches!(c, '\u{E0001}' | '\u{E0020}'..='\u{E007F}')
215}
216
217pub fn bidi_r_or_al(c: char) -> bool {
219 matches!(bidi_class(c), BidiClass::R | BidiClass::AL)
220}
221
222pub fn bidi_l(c: char) -> bool {
224 matches!(bidi_class(c), BidiClass::L)
225}
226
227pub fn x520_mapped_to_nothing(c: char) -> bool {
230 match c {
231 '\u{00AD}'
232 | '\u{1806}'
233 | '\u{034F}'
234 | '\u{180B}'..='\u{180D}'
235 | '\u{FE00}'..='\u{FE0F}'
236 | '\u{FFFC}'
237 | '\u{200B}' => true,
238 '\u{09}' | '\u{0A}'..='\u{0D}' | '\u{85}' => false,
240 _ => c.is_control(),
241 }
242}
243
244pub fn x520_mapped_to_space(c: char) -> bool {
247 match c {
248 '\u{09}' | '\u{0A}'..='\u{0D}' | '\u{85}' => true,
249 _ => c.general_category_group() == GeneralCategoryGroup::Separator,
250 }
251}