icu_properties/props.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28/// pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29/// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30/// ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36 (
37 $ ( #[$meta:meta] )*
38 impl $enum_ty:ident {
39 $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40 }
41 #[test]
42 fn $consts_test:ident();
43 ) => {
44 $( #[$meta] )*
45 impl $enum_ty {
46 $(
47 $(#[$const_meta])*
48 $v const $i: $t = $e;
49 )*
50
51 /// All possible values of this enum in the Unicode version
52 /// from this ICU4X release.
53 pub const ALL_VALUES: &'static [$enum_ty] = &[
54 $($enum_ty::$i),*
55 ];
56 }
57
58 #[cfg(feature = "datagen")]
59 impl databake::Bake for $enum_ty {
60 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
61 env.insert("icu_properties");
62 match *self {
63 $(
64 Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
65 )*
66 Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
67 }
68 }
69 }
70
71
72 impl From<$enum_ty> for u16 {
73 fn from(other: $enum_ty) -> Self {
74 other.0 as u16
75 }
76 }
77
78 #[test]
79 fn $consts_test() {
80 $(
81 assert_eq!(
82 crate::names::PropertyNamesLong::<$enum_ty>::new().get($enum_ty::$i).unwrap()
83 // Rust identifiers use camel case
84 .replace('_', "")
85 // We use Ethiopian
86 .replace("Ethiopic", "Ethiopian")
87 // Nastaliq is missing a long name?
88 .replace("Aran", "Nastaliq")
89 // We spell these out
90 .replace("LVSyllable", "LeadingVowelSyllable")
91 .replace("LVTSyllable", "LeadingVowelTrailingSyllable"),
92 stringify!($i)
93 );
94 )*
95 }
96 }
97}
98
99pub use crate::code_point_map::EnumeratedProperty;
100
101macro_rules! make_enumerated_property {
102 (
103 name: $name:literal;
104 short_name: $short_name:literal;
105 ident: $value_ty:path;
106 data_marker: $data_marker:ty;
107 singleton: $singleton:ident;
108 $(ule_ty: $ule_ty:ty;)?
109 ) => {
110 impl crate::private::Sealed for $value_ty {}
111
112 impl EnumeratedProperty for $value_ty {
113 type DataMarker = $data_marker;
114 #[cfg(feature = "compiled_data")]
115 const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
116 crate::provider::Baked::$singleton;
117 const NAME: &'static [u8] = $name.as_bytes();
118 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
119 }
120
121 $(
122 impl zerovec::ule::AsULE for $value_ty {
123 type ULE = $ule_ty;
124
125 fn to_unaligned(self) -> Self::ULE {
126 self.0.to_unaligned()
127 }
128 fn from_unaligned(unaligned: Self::ULE) -> Self {
129 Self(zerovec::ule::AsULE::from_unaligned(unaligned))
130 }
131 }
132 )?
133 };
134}
135
136/// Enumerated property Bidi_Class
137///
138/// These are the categories required by the Unicode Bidirectional Algorithm.
139/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
140/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
141///
142/// # Example
143///
144/// ```
145/// use icu::properties::{props::BidiClass, CodePointMapData};
146///
147/// assert_eq!(
148/// CodePointMapData::<BidiClass>::new().get('y'),
149/// BidiClass::LeftToRight
150/// ); // U+0079
151/// assert_eq!(
152/// CodePointMapData::<BidiClass>::new().get('ع'),
153/// BidiClass::ArabicLetter
154/// ); // U+0639
155/// ```
156#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
157#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
158#[allow(clippy::exhaustive_structs)] // newtype
159#[repr(transparent)]
160pub struct BidiClass(pub(crate) u8);
161
162impl BidiClass {
163 /// Returns an ICU4C `UBidiClass` value.
164 pub const fn to_icu4c_value(self) -> u8 {
165 self.0
166 }
167 /// Constructor from an ICU4C `UBidiClass` value.
168 pub const fn from_icu4c_value(value: u8) -> Self {
169 Self(value)
170 }
171}
172
173create_const_array! {
174#[allow(non_upper_case_globals)]
175impl BidiClass {
176 /// (`L`) any strong left-to-right character
177 pub const LeftToRight: BidiClass = BidiClass(0);
178 /// (`R`) any strong right-to-left (non-Arabic-type) character
179 pub const RightToLeft: BidiClass = BidiClass(1);
180 /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
181 pub const EuropeanNumber: BidiClass = BidiClass(2);
182 /// (`ES`) plus and minus signs
183 pub const EuropeanSeparator: BidiClass = BidiClass(3);
184 /// (`ET`) a terminator in a numeric format context, includes currency signs
185 pub const EuropeanTerminator: BidiClass = BidiClass(4);
186 /// (`AN`) any Arabic-Indic digit
187 pub const ArabicNumber: BidiClass = BidiClass(5);
188 /// (`CS`) commas, colons, and slashes
189 pub const CommonSeparator: BidiClass = BidiClass(6);
190 /// (`B`) various newline characters
191 pub const ParagraphSeparator: BidiClass = BidiClass(7);
192 /// (`S`) various segment-related control codes
193 pub const SegmentSeparator: BidiClass = BidiClass(8);
194 /// (`WS`) spaces
195 pub const WhiteSpace: BidiClass = BidiClass(9);
196 /// (`ON`) most other symbols and punctuation marks
197 pub const OtherNeutral: BidiClass = BidiClass(10);
198 /// (`LRE`) U+202A: the LR embedding control
199 pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
200 /// (`LRO`) U+202D: the LR override control
201 pub const LeftToRightOverride: BidiClass = BidiClass(12);
202 /// (`AL`) any strong right-to-left (Arabic-type) character
203 pub const ArabicLetter: BidiClass = BidiClass(13);
204 /// (`RLE`) U+202B: the RL embedding control
205 pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
206 /// (`RLO`) U+202E: the RL override control
207 pub const RightToLeftOverride: BidiClass = BidiClass(15);
208 /// (`PDF`) U+202C: terminates an embedding or override control
209 pub const PopDirectionalFormat: BidiClass = BidiClass(16);
210 /// (`NSM`) any nonspacing mark
211 pub const NonspacingMark: BidiClass = BidiClass(17);
212 /// (`BN`) most format characters, control codes, or noncharacters
213 pub const BoundaryNeutral: BidiClass = BidiClass(18);
214 /// (`FSI`) U+2068: the first strong isolate control
215 pub const FirstStrongIsolate: BidiClass = BidiClass(19);
216 /// (`LRI`) U+2066: the LR isolate control
217 pub const LeftToRightIsolate: BidiClass = BidiClass(20);
218 /// (`RLI`) U+2067: the RL isolate control
219 pub const RightToLeftIsolate: BidiClass = BidiClass(21);
220 /// (`PDI`) U+2069: terminates an isolate control
221 pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
222}
223#[test]
224fn bidi_props_consts();
225}
226
227make_enumerated_property! {
228 name: "Bidi_Class";
229 short_name: "bc";
230 ident: BidiClass;
231 data_marker: crate::provider::PropertyEnumBidiClassV1;
232 singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
233 ule_ty: u8;
234}
235
236// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
237pub(crate) mod gc {
238 /// Enumerated property General_Category.
239 ///
240 /// General_Category specifies the most general classification of a code point, usually
241 /// determined based on the primary characteristic of the assigned character. For example, is the
242 /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
243 ///
244 /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
245 /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
246 /// crate::props::GeneralCategoryGroup).
247 ///
248 /// # Example
249 ///
250 /// ```
251 /// use icu::properties::{props::GeneralCategory, CodePointMapData};
252 ///
253 /// assert_eq!(
254 /// CodePointMapData::<GeneralCategory>::new().get('木'),
255 /// GeneralCategory::OtherLetter
256 /// ); // U+6728
257 /// assert_eq!(
258 /// CodePointMapData::<GeneralCategory>::new().get('🎃'),
259 /// GeneralCategory::OtherSymbol
260 /// ); // U+1F383 JACK-O-LANTERN
261 /// ```
262 #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
263 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
264 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
265 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
266 #[allow(clippy::exhaustive_enums)] // this type is stable
267 #[zerovec::make_ule(GeneralCategoryULE)]
268 #[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV))]
269 #[repr(u8)]
270 pub enum GeneralCategory {
271 /// (`Cn`) A reserved unassigned code point or a noncharacter
272 Unassigned = 0,
273
274 /// (`Lu`) An uppercase letter
275 UppercaseLetter = 1,
276 /// (`Ll`) A lowercase letter
277 LowercaseLetter = 2,
278 /// (`Lt`) A digraphic letter, with first part uppercase
279 TitlecaseLetter = 3,
280 /// (`Lm`) A modifier letter
281 ModifierLetter = 4,
282 /// (`Lo`) Other letters, including syllables and ideographs
283 OtherLetter = 5,
284
285 /// (`Mn`) A nonspacing combining mark (zero advance width)
286 NonspacingMark = 6,
287 /// (`Mc`) A spacing combining mark (positive advance width)
288 SpacingMark = 8,
289 /// (`Me`) An enclosing combining mark
290 EnclosingMark = 7,
291
292 /// (`Nd`) A decimal digit
293 DecimalNumber = 9,
294 /// (`Nl`) A letterlike numeric character
295 LetterNumber = 10,
296 /// (`No`) A numeric character of other type
297 OtherNumber = 11,
298
299 /// (`Zs`) A space character (of various non-zero widths)
300 SpaceSeparator = 12,
301 /// (`Zl`) U+2028 LINE SEPARATOR only
302 LineSeparator = 13,
303 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
304 ParagraphSeparator = 14,
305
306 /// (`Cc`) A C0 or C1 control code
307 Control = 15,
308 /// (`Cf`) A format control character
309 Format = 16,
310 /// (`Co`) A private-use character
311 PrivateUse = 17,
312 /// (`Cs`) A surrogate code point
313 Surrogate = 18,
314
315 /// (`Pd`) A dash or hyphen punctuation mark
316 DashPunctuation = 19,
317 /// (`Ps`) An opening punctuation mark (of a pair)
318 OpenPunctuation = 20,
319 /// (`Pe`) A closing punctuation mark (of a pair)
320 ClosePunctuation = 21,
321 /// (`Pc`) A connecting punctuation mark, like a tie
322 ConnectorPunctuation = 22,
323 /// (`Pi`) An initial quotation mark
324 InitialPunctuation = 28,
325 /// (`Pf`) A final quotation mark
326 FinalPunctuation = 29,
327 /// (`Po`) A punctuation mark of other type
328 OtherPunctuation = 23,
329
330 /// (`Sm`) A symbol of mathematical use
331 MathSymbol = 24,
332 /// (`Sc`) A currency sign
333 CurrencySymbol = 25,
334 /// (`Sk`) A non-letterlike modifier symbol
335 ModifierSymbol = 26,
336 /// (`So`) A symbol of other type
337 OtherSymbol = 27,
338 }
339}
340
341pub use gc::GeneralCategory;
342
343impl GeneralCategory {
344 /// All possible values of this enum
345 pub const ALL_VALUES: &'static [GeneralCategory] = &[
346 GeneralCategory::Unassigned,
347 GeneralCategory::UppercaseLetter,
348 GeneralCategory::LowercaseLetter,
349 GeneralCategory::TitlecaseLetter,
350 GeneralCategory::ModifierLetter,
351 GeneralCategory::OtherLetter,
352 GeneralCategory::NonspacingMark,
353 GeneralCategory::SpacingMark,
354 GeneralCategory::EnclosingMark,
355 GeneralCategory::DecimalNumber,
356 GeneralCategory::LetterNumber,
357 GeneralCategory::OtherNumber,
358 GeneralCategory::SpaceSeparator,
359 GeneralCategory::LineSeparator,
360 GeneralCategory::ParagraphSeparator,
361 GeneralCategory::Control,
362 GeneralCategory::Format,
363 GeneralCategory::PrivateUse,
364 GeneralCategory::Surrogate,
365 GeneralCategory::DashPunctuation,
366 GeneralCategory::OpenPunctuation,
367 GeneralCategory::ClosePunctuation,
368 GeneralCategory::ConnectorPunctuation,
369 GeneralCategory::InitialPunctuation,
370 GeneralCategory::FinalPunctuation,
371 GeneralCategory::OtherPunctuation,
372 GeneralCategory::MathSymbol,
373 GeneralCategory::CurrencySymbol,
374 GeneralCategory::ModifierSymbol,
375 GeneralCategory::OtherSymbol,
376 ];
377}
378
379#[test]
380fn gc_variants() {
381 for &variant in GeneralCategory::ALL_VALUES {
382 assert_eq!(
383 crate::names::PropertyNamesLong::<GeneralCategory>::new()
384 .get(variant)
385 .unwrap()
386 // Rust identifiers use camel case
387 .replace('_', ""),
388 format!("{variant:?}")
389 );
390 }
391}
392
393#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
394/// Error value for `impl TryFrom<u8> for GeneralCategory`.
395#[non_exhaustive]
396pub struct GeneralCategoryOutOfBoundsError;
397
398impl TryFrom<u8> for GeneralCategory {
399 type Error = GeneralCategoryOutOfBoundsError;
400 /// Construct this [`GeneralCategory`] from an integer, returning
401 /// an error if it is out of bounds
402 fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
403 GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
404 }
405}
406
407make_enumerated_property! {
408 name: "General_Category";
409 short_name: "gc";
410 ident: GeneralCategory;
411 data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
412 singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
413}
414
415/// Groupings of multiple General_Category property values.
416///
417/// Instances of `GeneralCategoryGroup` represent the defined multi-category
418/// values that are useful for users in certain contexts, such as regex. In
419/// other words, unlike [`GeneralCategory`], this supports groups of general
420/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
421/// `LowercaseLetter`, etc.
422///
423/// See <https://www.unicode.org/reports/tr44/> .
424///
425/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
426/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
427/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
428///
429/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
430#[derive(Copy, Clone, PartialEq, Debug, Eq)]
431#[allow(clippy::exhaustive_structs)] // newtype
432#[repr(transparent)]
433pub struct GeneralCategoryGroup(pub(crate) u32);
434
435impl crate::private::Sealed for GeneralCategoryGroup {}
436
437use GeneralCategory as GC;
438use GeneralCategoryGroup as GCG;
439
440#[allow(non_upper_case_globals)]
441impl GeneralCategoryGroup {
442 /// (`Lu`) An uppercase letter
443 pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
444 /// (`Ll`) A lowercase letter
445 pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
446 /// (`Lt`) A digraphic letter, with first part uppercase
447 pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
448 /// (`Lm`) A modifier letter
449 pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
450 /// (`Lo`) Other letters, including syllables and ideographs
451 pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
452 /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
453 pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
454 | (1 << (GC::LowercaseLetter as u32))
455 | (1 << (GC::TitlecaseLetter as u32)));
456 /// (`L`) The union of all letter categories
457 pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
458 | (1 << (GC::LowercaseLetter as u32))
459 | (1 << (GC::TitlecaseLetter as u32))
460 | (1 << (GC::ModifierLetter as u32))
461 | (1 << (GC::OtherLetter as u32)));
462
463 /// (`Mn`) A nonspacing combining mark (zero advance width)
464 pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
465 /// (`Mc`) A spacing combining mark (positive advance width)
466 pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
467 /// (`Me`) An enclosing combining mark
468 pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
469 /// (`M`) The union of all mark categories
470 pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
471 | (1 << (GC::EnclosingMark as u32))
472 | (1 << (GC::SpacingMark as u32)));
473
474 /// (`Nd`) A decimal digit
475 pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
476 /// (`Nl`) A letterlike numeric character
477 pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
478 /// (`No`) A numeric character of other type
479 pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
480 /// (`N`) The union of all number categories
481 pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
482 | (1 << (GC::LetterNumber as u32))
483 | (1 << (GC::OtherNumber as u32)));
484
485 /// (`Zs`) A space character (of various non-zero widths)
486 pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
487 /// (`Zl`) U+2028 LINE SEPARATOR only
488 pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
489 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
490 pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
491 /// (`Z`) The union of all separator categories
492 pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
493 | (1 << (GC::LineSeparator as u32))
494 | (1 << (GC::ParagraphSeparator as u32)));
495
496 /// (`Cc`) A C0 or C1 control code
497 pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
498 /// (`Cf`) A format control character
499 pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
500 /// (`Co`) A private-use character
501 pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
502 /// (`Cs`) A surrogate code point
503 pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
504 /// (`Cn`) A reserved unassigned code point or a noncharacter
505 pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
506 /// (`C`) The union of all control code, reserved, and unassigned categories
507 pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
508 | (1 << (GC::Format as u32))
509 | (1 << (GC::PrivateUse as u32))
510 | (1 << (GC::Surrogate as u32))
511 | (1 << (GC::Unassigned as u32)));
512
513 /// (`Pd`) A dash or hyphen punctuation mark
514 pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
515 /// (`Ps`) An opening punctuation mark (of a pair)
516 pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
517 /// (`Pe`) A closing punctuation mark (of a pair)
518 pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
519 /// (`Pc`) A connecting punctuation mark, like a tie
520 pub const ConnectorPunctuation: GeneralCategoryGroup =
521 GCG(1 << (GC::ConnectorPunctuation as u32));
522 /// (`Pi`) An initial quotation mark
523 pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
524 /// (`Pf`) A final quotation mark
525 pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
526 /// (`Po`) A punctuation mark of other type
527 pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
528 /// (`P`) The union of all punctuation categories
529 pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
530 | (1 << (GC::OpenPunctuation as u32))
531 | (1 << (GC::ClosePunctuation as u32))
532 | (1 << (GC::ConnectorPunctuation as u32))
533 | (1 << (GC::OtherPunctuation as u32))
534 | (1 << (GC::InitialPunctuation as u32))
535 | (1 << (GC::FinalPunctuation as u32)));
536
537 /// (`Sm`) A symbol of mathematical use
538 pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
539 /// (`Sc`) A currency sign
540 pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
541 /// (`Sk`) A non-letterlike modifier symbol
542 pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
543 /// (`So`) A symbol of other type
544 pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
545 /// (`S`) The union of all symbol categories
546 pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
547 | (1 << (GC::CurrencySymbol as u32))
548 | (1 << (GC::ModifierSymbol as u32))
549 | (1 << (GC::OtherSymbol as u32)));
550
551 const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
552
553 /// Return whether the code point belongs in the provided multi-value category.
554 ///
555 /// ```
556 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
557 /// use icu::properties::CodePointMapData;
558 ///
559 /// let gc = CodePointMapData::<GeneralCategory>::new();
560 ///
561 /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
562 /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
563 ///
564 /// // U+0B1E ORIYA LETTER NYA
565 /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
566 /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
567 /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
568 ///
569 /// // U+0301 COMBINING ACUTE ACCENT
570 /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
571 /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
572 /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
573 ///
574 /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
575 /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
576 /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
577 ///
578 /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
579 /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
580 /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
581 ///
582 /// // U+2713 CHECK MARK
583 /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
584 /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
585 /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
586 ///
587 /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
588 /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
589 /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
590 ///
591 /// // U+E007F CANCEL TAG
592 /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
593 /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
594 /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
595 /// ```
596 pub const fn contains(self, val: GeneralCategory) -> bool {
597 0 != (1 << (val as u32)) & self.0
598 }
599
600 /// Produce a GeneralCategoryGroup that is the inverse of this one
601 ///
602 /// # Example
603 ///
604 /// ```rust
605 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
606 ///
607 /// let letter = GeneralCategoryGroup::Letter;
608 /// let not_letter = letter.complement();
609 ///
610 /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
611 /// assert!(!letter.contains(GeneralCategory::MathSymbol));
612 /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
613 /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
614 /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
615 /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
616 /// ```
617 pub const fn complement(self) -> Self {
618 // Mask off things not in Self::ALL to guarantee the mask
619 // values stay in-range
620 GeneralCategoryGroup(!self.0 & Self::ALL)
621 }
622
623 /// Return the group representing all GeneralCategory values
624 ///
625 /// # Example
626 ///
627 /// ```rust
628 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
629 ///
630 /// let all = GeneralCategoryGroup::all();
631 ///
632 /// assert!(all.contains(GeneralCategory::MathSymbol));
633 /// assert!(all.contains(GeneralCategory::OtherPunctuation));
634 /// assert!(all.contains(GeneralCategory::UppercaseLetter));
635 /// ```
636 pub const fn all() -> Self {
637 Self(Self::ALL)
638 }
639
640 /// Return the empty group
641 ///
642 /// # Example
643 ///
644 /// ```rust
645 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
646 ///
647 /// let empty = GeneralCategoryGroup::empty();
648 ///
649 /// assert!(!empty.contains(GeneralCategory::MathSymbol));
650 /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
651 /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
652 /// ```
653 pub const fn empty() -> Self {
654 Self(0)
655 }
656
657 /// Take the union of two groups
658 ///
659 /// # Example
660 ///
661 /// ```rust
662 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
663 ///
664 /// let letter = GeneralCategoryGroup::Letter;
665 /// let symbol = GeneralCategoryGroup::Symbol;
666 /// let union = letter.union(symbol);
667 ///
668 /// assert!(union.contains(GeneralCategory::MathSymbol));
669 /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
670 /// assert!(union.contains(GeneralCategory::UppercaseLetter));
671 /// ```
672 pub const fn union(self, other: Self) -> Self {
673 Self(self.0 | other.0)
674 }
675
676 /// Take the intersection of two groups
677 ///
678 /// # Example
679 ///
680 /// ```rust
681 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
682 ///
683 /// let letter = GeneralCategoryGroup::Letter;
684 /// let lu = GeneralCategoryGroup::UppercaseLetter;
685 /// let intersection = letter.intersection(lu);
686 ///
687 /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
688 /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
689 /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
690 /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
691 /// ```
692 pub const fn intersection(self, other: Self) -> Self {
693 Self(self.0 & other.0)
694 }
695}
696
697impl From<GeneralCategory> for GeneralCategoryGroup {
698 fn from(subcategory: GeneralCategory) -> Self {
699 GeneralCategoryGroup(1 << (subcategory as u32))
700 }
701}
702impl From<u32> for GeneralCategoryGroup {
703 fn from(mask: u32) -> Self {
704 // Mask off things not in Self::ALL to guarantee the mask
705 // values stay in-range
706 GeneralCategoryGroup(mask & Self::ALL)
707 }
708}
709impl From<GeneralCategoryGroup> for u32 {
710 fn from(group: GeneralCategoryGroup) -> Self {
711 group.0
712 }
713}
714
715/// Enumerated property Script.
716///
717/// This is used with both the Script and Script_Extensions Unicode properties.
718/// Each character is assigned a single Script, but characters that are used in
719/// a particular subset of scripts will be in more than one Script_Extensions set.
720/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
721/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
722/// determine whether a code point belongs to a certain script, you should use
723/// [`ScriptWithExtensionsBorrowed::has_script`].
724///
725/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
726/// See `UScriptCode` in ICU4C.
727///
728/// # Example
729///
730/// ```
731/// use icu::properties::{CodePointMapData, props::Script};
732///
733/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728
734/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN
735/// ```
736/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
737#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
738#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
739#[allow(clippy::exhaustive_structs)] // newtype
740#[repr(transparent)]
741pub struct Script(pub(crate) u16);
742
743impl Script {
744 /// Returns an ICU4C `UScriptCode` value.
745 pub const fn to_icu4c_value(self) -> u16 {
746 self.0
747 }
748 /// Constructor from an ICU4C `UScriptCode` value.
749 pub const fn from_icu4c_value(value: u16) -> Self {
750 Self(value)
751 }
752}
753
754create_const_array! {
755#[allow(missing_docs)] // These constants don't need individual documentation.
756#[allow(non_upper_case_globals)]
757impl Script {
758 pub const Adlam: Script = Script(167);
759 pub const Ahom: Script = Script(161);
760 pub const AnatolianHieroglyphs: Script = Script(156);
761 pub const Arabic: Script = Script(2);
762 pub const Armenian: Script = Script(3);
763 pub const Avestan: Script = Script(117);
764 pub const Balinese: Script = Script(62);
765 pub const Bamum: Script = Script(130);
766 pub const BassaVah: Script = Script(134);
767 pub const Batak: Script = Script(63);
768 pub const Bengali: Script = Script(4);
769 pub const BeriaErfe: Script = Script(208);
770 pub const Bhaiksuki: Script = Script(168);
771 pub const Bopomofo: Script = Script(5);
772 pub const Brahmi: Script = Script(65);
773 pub const Braille: Script = Script(46);
774 pub const Buginese: Script = Script(55);
775 pub const Buhid: Script = Script(44);
776 pub const CanadianAboriginal: Script = Script(40);
777 pub const Carian: Script = Script(104);
778 pub const CaucasianAlbanian: Script = Script(159);
779 pub const Chakma: Script = Script(118);
780 pub const Cham: Script = Script(66);
781 pub const Cherokee: Script = Script(6);
782 pub const Chorasmian: Script = Script(189);
783 pub const Common: Script = Script(0);
784 pub const Coptic: Script = Script(7);
785 pub const Cuneiform: Script = Script(101);
786 pub const Cypriot: Script = Script(47);
787 pub const CyproMinoan: Script = Script(193);
788 pub const Cyrillic: Script = Script(8);
789 pub const Deseret: Script = Script(9);
790 pub const Devanagari: Script = Script(10);
791 pub const DivesAkuru: Script = Script(190);
792 pub const Dogra: Script = Script(178);
793 pub const Duployan: Script = Script(135);
794 pub const EgyptianHieroglyphs: Script = Script(71);
795 pub const Elbasan: Script = Script(136);
796 pub const Elymaic: Script = Script(185);
797 pub const Ethiopian: Script = Script(11);
798 pub const Garay: Script = Script(201);
799 pub const Georgian: Script = Script(12);
800 pub const Glagolitic: Script = Script(56);
801 pub const Gothic: Script = Script(13);
802 pub const Grantha: Script = Script(137);
803 pub const Greek: Script = Script(14);
804 pub const Gujarati: Script = Script(15);
805 pub const GunjalaGondi: Script = Script(179);
806 pub const Gurmukhi: Script = Script(16);
807 pub const GurungKhema: Script = Script(202);
808 pub const Han: Script = Script(17);
809 pub const Hangul: Script = Script(18);
810 pub const HanifiRohingya: Script = Script(182);
811 pub const Hanunoo: Script = Script(43);
812 pub const Hatran: Script = Script(162);
813 pub const Hebrew: Script = Script(19);
814 pub const Hiragana: Script = Script(20);
815 pub const ImperialAramaic: Script = Script(116);
816 pub const Inherited: Script = Script(1);
817 pub const InscriptionalPahlavi: Script = Script(122);
818 pub const InscriptionalParthian: Script = Script(125);
819 pub const Javanese: Script = Script(78);
820 pub const Kaithi: Script = Script(120);
821 pub const Kannada: Script = Script(21);
822 pub const Katakana: Script = Script(22);
823 pub const Kawi: Script = Script(198);
824 pub const KayahLi: Script = Script(79);
825 pub const Kharoshthi: Script = Script(57);
826 pub const KhitanSmallScript: Script = Script(191);
827 pub const Khmer: Script = Script(23);
828 pub const Khojki: Script = Script(157);
829 pub const Khudawadi: Script = Script(145);
830 pub const KiratRai: Script = Script(203);
831 pub const Lao: Script = Script(24);
832 pub const Latin: Script = Script(25);
833 pub const Lepcha: Script = Script(82);
834 pub const Limbu: Script = Script(48);
835 pub const LinearA: Script = Script(83);
836 pub const LinearB: Script = Script(49);
837 pub const Lisu: Script = Script(131);
838 pub const Lycian: Script = Script(107);
839 pub const Lydian: Script = Script(108);
840 pub const Mahajani: Script = Script(160);
841 pub const Makasar: Script = Script(180);
842 pub const Malayalam: Script = Script(26);
843 pub const Mandaic: Script = Script(84);
844 pub const Manichaean: Script = Script(121);
845 pub const Marchen: Script = Script(169);
846 pub const MasaramGondi: Script = Script(175);
847 pub const Medefaidrin: Script = Script(181);
848 pub const MeeteiMayek: Script = Script(115);
849 pub const MendeKikakui: Script = Script(140);
850 pub const MeroiticCursive: Script = Script(141);
851 pub const MeroiticHieroglyphs: Script = Script(86);
852 pub const Miao: Script = Script(92);
853 pub const Modi: Script = Script(163);
854 pub const Mongolian: Script = Script(27);
855 pub const Mro: Script = Script(149);
856 pub const Multani: Script = Script(164);
857 pub const Myanmar: Script = Script(28);
858 pub const Nabataean: Script = Script(143);
859 pub const NagMundari: Script = Script(199);
860 pub const Nandinagari: Script = Script(187);
861 pub const Nastaliq: Script = Script(200);
862 pub const Newa: Script = Script(170);
863 pub const NewTaiLue: Script = Script(59);
864 pub const Nko: Script = Script(87);
865 pub const Nushu: Script = Script(150);
866 pub const NyiakengPuachueHmong: Script = Script(186);
867 pub const Ogham: Script = Script(29);
868 pub const OlChiki: Script = Script(109);
869 pub const OldHungarian: Script = Script(76);
870 pub const OldItalic: Script = Script(30);
871 pub const OldNorthArabian: Script = Script(142);
872 pub const OldPermic: Script = Script(89);
873 pub const OldPersian: Script = Script(61);
874 pub const OldSogdian: Script = Script(184);
875 pub const OldSouthArabian: Script = Script(133);
876 pub const OldTurkic: Script = Script(88);
877 pub const OldUyghur: Script = Script(194);
878 pub const OlOnal: Script = Script(204);
879 pub const Oriya: Script = Script(31);
880 pub const Osage: Script = Script(171);
881 pub const Osmanya: Script = Script(50);
882 pub const PahawhHmong: Script = Script(75);
883 pub const Palmyrene: Script = Script(144);
884 pub const PauCinHau: Script = Script(165);
885 pub const PhagsPa: Script = Script(90);
886 pub const Phoenician: Script = Script(91);
887 pub const PsalterPahlavi: Script = Script(123);
888 pub const Rejang: Script = Script(110);
889 pub const Runic: Script = Script(32);
890 pub const Samaritan: Script = Script(126);
891 pub const Saurashtra: Script = Script(111);
892 pub const Sharada: Script = Script(151);
893 pub const Shavian: Script = Script(51);
894 pub const Siddham: Script = Script(166);
895 pub const Sidetic: Script = Script(209);
896 pub const SignWriting: Script = Script(112);
897 pub const Sinhala: Script = Script(33);
898 pub const Sogdian: Script = Script(183);
899 pub const SoraSompeng: Script = Script(152);
900 pub const Soyombo: Script = Script(176);
901 pub const Sundanese: Script = Script(113);
902 pub const Sunuwar: Script = Script(205);
903 pub const SylotiNagri: Script = Script(58);
904 pub const Syriac: Script = Script(34);
905 pub const Tagalog: Script = Script(42);
906 pub const Tagbanwa: Script = Script(45);
907 pub const TaiLe: Script = Script(52);
908 pub const TaiTham: Script = Script(106);
909 pub const TaiViet: Script = Script(127);
910 pub const TaiYo: Script = Script(210);
911 pub const Takri: Script = Script(153);
912 pub const Tamil: Script = Script(35);
913 pub const Tangsa: Script = Script(195);
914 pub const Tangut: Script = Script(154);
915 pub const Telugu: Script = Script(36);
916 pub const Thaana: Script = Script(37);
917 pub const Thai: Script = Script(38);
918 pub const Tibetan: Script = Script(39);
919 pub const Tifinagh: Script = Script(60);
920 pub const Tirhuta: Script = Script(158);
921 pub const Todhri: Script = Script(206);
922 pub const TolongSiki: Script = Script(211);
923 pub const Toto: Script = Script(196);
924 pub const TuluTigalari: Script = Script(207);
925 pub const Ugaritic: Script = Script(53);
926 pub const Unknown: Script = Script(103);
927 pub const Vai: Script = Script(99);
928 pub const Vithkuqi: Script = Script(197);
929 pub const Wancho: Script = Script(188);
930 pub const WarangCiti: Script = Script(146);
931 pub const Yezidi: Script = Script(192);
932 pub const Yi: Script = Script(41);
933 pub const ZanabazarSquare: Script = Script(177);
934}
935#[test]
936fn script_consts();
937}
938
939impl Script {
940 // Doesn't actually exist!
941 #[doc(hidden)]
942 #[allow(non_upper_case_globals)]
943 #[deprecated]
944 // Some high value that ICU4C will not use anytime soon
945 pub const Chisoi: Script = Self(60_000);
946}
947
948/// ✨ *Enabled with the `compiled_data` Cargo feature.*
949#[cfg(feature = "compiled_data")]
950impl From<Script> for icu_locale_core::subtags::Script {
951 fn from(value: Script) -> Self {
952 crate::PropertyNamesShort::new()
953 .get_locale_script(value)
954 .unwrap_or(icu_locale_core::subtags::script!("Zzzz"))
955 }
956}
957
958/// ✨ *Enabled with the `compiled_data` Cargo feature.*
959#[cfg(feature = "compiled_data")]
960impl From<icu_locale_core::subtags::Script> for Script {
961 fn from(value: icu_locale_core::subtags::Script) -> Self {
962 crate::PropertyParser::new()
963 .get_strict(value.as_str())
964 .unwrap_or(Self::Unknown)
965 }
966}
967
968make_enumerated_property! {
969 name: "Script";
970 short_name: "sc";
971 ident: Script;
972 data_marker: crate::provider::PropertyEnumScriptV1;
973 singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
974 ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
975}
976
977/// Enumerated property Hangul_Syllable_Type
978///
979/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
980/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
981///
982/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
983///
984/// # Example
985///
986/// ```
987/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
988///
989/// assert_eq!(
990/// CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
991/// HangulSyllableType::LeadingJamo
992/// ); // U+1100
993/// assert_eq!(
994/// CodePointMapData::<HangulSyllableType>::new().get('가'),
995/// HangulSyllableType::LeadingVowelSyllable
996/// ); // U+AC00
997/// ```
998#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
999#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1000#[allow(clippy::exhaustive_structs)] // newtype
1001#[repr(transparent)]
1002pub struct HangulSyllableType(pub(crate) u8);
1003
1004impl HangulSyllableType {
1005 /// Returns an ICU4C `UHangulSyllableType` value.
1006 pub const fn to_icu4c_value(self) -> u8 {
1007 self.0
1008 }
1009 /// Constructor from an ICU4C `UHangulSyllableType` value.
1010 pub const fn from_icu4c_value(value: u8) -> Self {
1011 Self(value)
1012 }
1013}
1014
1015create_const_array! {
1016#[allow(non_upper_case_globals)]
1017impl HangulSyllableType {
1018 /// (`NA`) not applicable (e.g. not a Hangul code point).
1019 pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
1020 /// (`L`) a conjoining leading consonant Jamo.
1021 pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
1022 /// (`V`) a conjoining vowel Jamo.
1023 pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
1024 /// (`T`) a conjoining trailing consonant Jamo.
1025 pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
1026 /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
1027 pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
1028 /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
1029 pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
1030}
1031#[test]
1032fn hangul_syllable_type_consts();
1033}
1034
1035make_enumerated_property! {
1036 name: "Hangul_Syllable_Type";
1037 short_name: "hst";
1038 ident: HangulSyllableType;
1039 data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
1040 singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
1041 ule_ty: u8;
1042
1043}
1044
1045/// Enumerated property East_Asian_Width.
1046///
1047/// See "Definition" in UAX #11 for the summary of each property value:
1048/// <https://www.unicode.org/reports/tr11/#Definitions>
1049///
1050/// # Example
1051///
1052/// ```
1053/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
1054///
1055/// assert_eq!(
1056/// CodePointMapData::<EastAsianWidth>::new().get('ア'),
1057/// EastAsianWidth::Halfwidth
1058/// ); // U+FF71: Halfwidth Katakana Letter A
1059/// assert_eq!(
1060/// CodePointMapData::<EastAsianWidth>::new().get('ア'),
1061/// EastAsianWidth::Wide
1062/// ); //U+30A2: Katakana Letter A
1063/// ```
1064#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1065#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1066#[allow(clippy::exhaustive_structs)] // newtype
1067#[repr(transparent)]
1068pub struct EastAsianWidth(pub(crate) u8);
1069
1070impl EastAsianWidth {
1071 /// Returns an ICU4C `UEastAsianWidth` value.
1072 pub const fn to_icu4c_value(self) -> u8 {
1073 self.0
1074 }
1075 /// Constructor from an ICU4C `UEastAsianWidth` value.
1076 pub const fn from_icu4c_value(value: u8) -> Self {
1077 Self(value)
1078 }
1079}
1080
1081create_const_array! {
1082#[allow(missing_docs)] // These constants don't need individual documentation.
1083#[allow(non_upper_case_globals)]
1084impl EastAsianWidth {
1085 pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1086 pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1087 pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1088 pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1089 pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1090 pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1091}
1092#[test]
1093fn east_asian_width_consts();
1094}
1095
1096make_enumerated_property! {
1097 name: "East_Asian_Width";
1098 short_name: "ea";
1099 ident: EastAsianWidth;
1100 data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1101 singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1102 ule_ty: u8;
1103}
1104
1105/// Enumerated property Line_Break.
1106///
1107/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1108/// value: <https://www.unicode.org/reports/tr14/#Properties>
1109///
1110/// The numeric value is compatible with `ULineBreak` in ICU4C.
1111///
1112/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1113///
1114/// # Example
1115///
1116/// ```
1117/// use icu::properties::{props::LineBreak, CodePointMapData};
1118///
1119/// assert_eq!(
1120/// CodePointMapData::<LineBreak>::new().get(')'),
1121/// LineBreak::CloseParenthesis
1122/// ); // U+0029: Right Parenthesis
1123/// assert_eq!(
1124/// CodePointMapData::<LineBreak>::new().get('ぁ'),
1125/// LineBreak::ConditionalJapaneseStarter
1126/// ); //U+3041: Hiragana Letter Small A
1127/// ```
1128#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1129#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1130#[allow(clippy::exhaustive_structs)] // newtype
1131#[repr(transparent)]
1132pub struct LineBreak(pub(crate) u8);
1133
1134impl LineBreak {
1135 /// Returns an ICU4C `ULineBreak` value.
1136 pub const fn to_icu4c_value(self) -> u8 {
1137 self.0
1138 }
1139 /// Constructor from an ICU4C `ULineBreak` value.
1140 pub const fn from_icu4c_value(value: u8) -> Self {
1141 Self(value)
1142 }
1143}
1144
1145create_const_array! {
1146#[allow(missing_docs)] // These constants don't need individual documentation.
1147#[allow(non_upper_case_globals)]
1148impl LineBreak {
1149 pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1150 pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1151 pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1152 pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1153 pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1154 pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1155 pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1156 pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1157 pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1158 pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1159 pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1160 pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1161 pub const Glue: LineBreak = LineBreak(12); // name="GL"
1162 pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1163 pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1164 pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1165 pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1166 pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1167 pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1168 pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1169 pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1170 pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1171 pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1172 pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1173 pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1174 pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1175 pub const Space: LineBreak = LineBreak(26); // name="SP"
1176 pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1177 pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1178 pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1179 pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1180 pub const H2: LineBreak = LineBreak(31); // name="H2"
1181 pub const H3: LineBreak = LineBreak(32); // name="H3"
1182 pub const JL: LineBreak = LineBreak(33); // name="JL"
1183 pub const JT: LineBreak = LineBreak(34); // name="JT"
1184 pub const JV: LineBreak = LineBreak(35); // name="JV"
1185 pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1186 pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1187 pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1188 pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1189 pub const EBase: LineBreak = LineBreak(40); // name="EB"
1190 pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1191 pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1192
1193 // Added in ICU 74:
1194 pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1195 pub const AksaraPrebase: LineBreak = LineBreak(44); // name="AP"
1196 pub const AksaraStart: LineBreak = LineBreak(45); // name="AS"
1197 pub const ViramaFinal: LineBreak = LineBreak(46); // name="VF"
1198 pub const Virama: LineBreak = LineBreak(47); // name="VI"
1199
1200 // Added in ICU 78:
1201 pub const UnambiguousHyphen: LineBreak = LineBreak(48); // name="HH"
1202}
1203#[test]
1204fn line_break_consts();
1205}
1206
1207make_enumerated_property! {
1208 name: "Line_Break";
1209 short_name: "lb";
1210 ident: LineBreak;
1211 data_marker: crate::provider::PropertyEnumLineBreakV1;
1212 singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1213 ule_ty: u8;
1214}
1215
1216/// Enumerated property Grapheme_Cluster_Break.
1217///
1218/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1219/// summary of each property value:
1220/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1221///
1222/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1223///
1224/// # Example
1225///
1226/// ```
1227/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1228///
1229/// assert_eq!(
1230/// CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1231/// GraphemeClusterBreak::RegionalIndicator
1232/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1233/// assert_eq!(
1234/// CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1235/// GraphemeClusterBreak::SpacingMark
1236/// ); //U+0E33: Thai Character Sara Am
1237/// ```
1238#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1239#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1240#[allow(clippy::exhaustive_structs)] // this type is stable
1241#[repr(transparent)]
1242pub struct GraphemeClusterBreak(pub(crate) u8);
1243
1244impl GraphemeClusterBreak {
1245 /// Returns an ICU4C `UGraphemeClusterBreak` value.
1246 pub const fn to_icu4c_value(self) -> u8 {
1247 self.0
1248 }
1249 /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1250 pub const fn from_icu4c_value(value: u8) -> Self {
1251 Self(value)
1252 }
1253}
1254
1255create_const_array! {
1256#[allow(missing_docs)] // These constants don't need individual documentation.
1257#[allow(non_upper_case_globals)]
1258impl GraphemeClusterBreak {
1259 pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1260 pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1261 pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1262 pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1263 pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1264 pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1265 pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1266 pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1267 pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1268 pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1269 pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1270 pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1271 pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1272 /// This value is obsolete and unused.
1273 pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1274 /// This value is obsolete and unused.
1275 pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1276 /// This value is obsolete and unused.
1277 pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1278 /// This value is obsolete and unused.
1279 pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1280 pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1281}
1282#[test]
1283fn gcb_consts();
1284}
1285
1286make_enumerated_property! {
1287 name: "Grapheme_Cluster_Break";
1288 short_name: "GCB";
1289 ident: GraphemeClusterBreak;
1290 data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1291 singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1292 ule_ty: u8;
1293}
1294
1295/// Enumerated property Word_Break.
1296///
1297/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1298/// each property value:
1299/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1300///
1301/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1302///
1303/// # Example
1304///
1305/// ```
1306/// use icu::properties::{props::WordBreak, CodePointMapData};
1307///
1308/// assert_eq!(
1309/// CodePointMapData::<WordBreak>::new().get('.'),
1310/// WordBreak::MidNumLet
1311/// ); // U+002E: Full Stop
1312/// assert_eq!(
1313/// CodePointMapData::<WordBreak>::new().get(','),
1314/// WordBreak::MidNum
1315/// ); // U+FF0C: Fullwidth Comma
1316/// ```
1317#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1318#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1319#[allow(clippy::exhaustive_structs)] // newtype
1320#[repr(transparent)]
1321pub struct WordBreak(pub(crate) u8);
1322
1323impl WordBreak {
1324 /// Returns an ICU4C `UWordBreak` value.
1325 pub const fn to_icu4c_value(self) -> u8 {
1326 self.0
1327 }
1328 /// Constructor from an ICU4C `UWordBreak` value.
1329 pub const fn from_icu4c_value(value: u8) -> Self {
1330 Self(value)
1331 }
1332}
1333
1334create_const_array! {
1335#[allow(missing_docs)] // These constants don't need individual documentation.
1336#[allow(non_upper_case_globals)]
1337impl WordBreak {
1338 pub const Other: WordBreak = WordBreak(0); // name="XX"
1339 pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1340 pub const Format: WordBreak = WordBreak(2); // name="FO"
1341 pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1342 pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1343 pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1344 pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1345 pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1346 pub const CR: WordBreak = WordBreak(8); // name="CR"
1347 pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1348 pub const LF: WordBreak = WordBreak(10); // name="LF"
1349 pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1350 pub const Newline: WordBreak = WordBreak(12); // name="NL"
1351 pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1352 pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1353 pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1354 pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1355 /// This value is obsolete and unused.
1356 pub const EBase: WordBreak = WordBreak(17); // name="EB"
1357 /// This value is obsolete and unused.
1358 pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1359 /// This value is obsolete and unused.
1360 pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1361 /// This value is obsolete and unused.
1362 pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1363 pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1364 pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1365}
1366#[test]
1367fn word_break_consts();
1368}
1369
1370make_enumerated_property! {
1371 name: "Word_Break";
1372 short_name: "WB";
1373 ident: WordBreak;
1374 data_marker: crate::provider::PropertyEnumWordBreakV1;
1375 singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1376 ule_ty: u8;
1377}
1378
1379/// Enumerated property Sentence_Break.
1380///
1381/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1382/// each property value:
1383/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1384///
1385/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1386///
1387/// # Example
1388///
1389/// ```
1390/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1391///
1392/// assert_eq!(
1393/// CodePointMapData::<SentenceBreak>::new().get('9'),
1394/// SentenceBreak::Numeric
1395/// ); // U+FF19: Fullwidth Digit Nine
1396/// assert_eq!(
1397/// CodePointMapData::<SentenceBreak>::new().get(','),
1398/// SentenceBreak::SContinue
1399/// ); // U+002C: Comma
1400/// ```
1401#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1402#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1403#[allow(clippy::exhaustive_structs)] // newtype
1404#[repr(transparent)]
1405pub struct SentenceBreak(pub(crate) u8);
1406
1407impl SentenceBreak {
1408 /// Returns an ICU4C `USentenceBreak` value.
1409 pub const fn to_icu4c_value(self) -> u8 {
1410 self.0
1411 }
1412 /// Constructor from an ICU4C `USentenceBreak` value.
1413 pub const fn from_icu4c_value(value: u8) -> Self {
1414 Self(value)
1415 }
1416}
1417
1418create_const_array! {
1419#[allow(missing_docs)] // These constants don't need individual documentation.
1420#[allow(non_upper_case_globals)]
1421impl SentenceBreak {
1422 pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1423 pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1424 pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1425 pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1426 pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1427 pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1428 pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1429 pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1430 pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1431 pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1432 pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1433 pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1434 pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1435 pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1436 pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1437}
1438#[test]
1439fn sentence_break_consts();
1440}
1441
1442make_enumerated_property! {
1443 name: "Sentence_Break";
1444 short_name: "SB";
1445 ident: SentenceBreak;
1446 data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1447 singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1448 ule_ty: u8;
1449}
1450
1451/// Property Canonical_Combining_Class.
1452/// See UAX #15:
1453/// <https://www.unicode.org/reports/tr15/>.
1454///
1455/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1456/// to look up the Canonical_Combining_Class property by scalar value.
1457///
1458/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1459/// to look up the Canonical_Combining_Class property by scalar value.
1460///
1461/// # Example
1462///
1463/// ```
1464/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1465///
1466/// assert_eq!(
1467/// CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1468/// CanonicalCombiningClass::NotReordered
1469/// ); // U+0061: LATIN SMALL LETTER A
1470/// assert_eq!(
1471/// CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1472/// CanonicalCombiningClass::Above
1473/// ); // U+0301: COMBINING ACUTE ACCENT
1474/// ```
1475//
1476// NOTE: The Pernosco debugger has special knowledge
1477// of this struct. Please do not change the bit layout
1478// or the crate-module-qualified name of this struct
1479// without coordination.
1480#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1481#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1482#[allow(clippy::exhaustive_structs)] // newtype
1483#[repr(transparent)]
1484pub struct CanonicalCombiningClass(pub(crate) u8);
1485
1486impl CanonicalCombiningClass {
1487 /// Returns an ICU4C `UCanonicalCombiningClass` value.
1488 pub const fn to_icu4c_value(self) -> u8 {
1489 self.0
1490 }
1491 /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1492 pub const fn from_icu4c_value(value: u8) -> Self {
1493 Self(value)
1494 }
1495}
1496
1497create_const_array! {
1498// These constant names come from PropertyValueAliases.txt
1499#[allow(missing_docs)] // These constants don't need individual documentation.
1500#[allow(non_upper_case_globals)]
1501impl CanonicalCombiningClass {
1502 pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1503 pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1504 pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1505 pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1506 pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1507 pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1508 pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1509 pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1510 pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1511 pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1512 pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1513 pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1514 pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1515 pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1516 pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1517 pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1518 pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1519 pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1520 pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1521 pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1522 pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1523 pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1524 pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1525 pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1526 pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1527 pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1528 pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1529 pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1530 pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1531 pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1532 pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1533 pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1534 pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1535 pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1536 pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1537 pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1538 pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1539 pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1540 pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1541 pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1542 pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1543 pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1544 pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1545 pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1546 pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1547 pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1548 pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1549 pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1550 pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1551 pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1552 pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1553 pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1554 pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1555 pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1556 pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1557 pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1558 pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1559 pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1560}
1561#[test]
1562fn ccc_consts();
1563}
1564
1565make_enumerated_property! {
1566 name: "Canonical_Combining_Class";
1567 short_name: "ccc";
1568 ident: CanonicalCombiningClass;
1569 data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1570 singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1571 ule_ty: u8;
1572}
1573
1574/// Property Indic_Conjunct_Break.
1575/// See UAX #44:
1576/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1577///
1578/// # Example
1579///
1580/// ```
1581/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1582///
1583/// assert_eq!(
1584/// CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1585/// IndicConjunctBreak::None
1586/// );
1587/// assert_eq!(
1588/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1589/// IndicConjunctBreak::Linker
1590/// );
1591/// assert_eq!(
1592/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1593/// IndicConjunctBreak::Consonant
1594/// );
1595/// assert_eq!(
1596/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1597/// IndicConjunctBreak::Extend
1598/// );
1599/// ```
1600#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1601#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1602#[allow(clippy::exhaustive_structs)] // newtype
1603#[repr(transparent)]
1604pub struct IndicConjunctBreak(pub(crate) u8);
1605
1606impl IndicConjunctBreak {
1607 /// Returns an ICU4C `UIndicConjunctBreak` value.
1608 pub const fn to_icu4c_value(self) -> u8 {
1609 self.0
1610 }
1611 /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1612 pub const fn from_icu4c_value(value: u8) -> Self {
1613 Self(value)
1614 }
1615}
1616
1617create_const_array! {
1618#[allow(missing_docs)] // These constants don't need individual documentation.
1619#[allow(non_upper_case_globals)]
1620impl IndicConjunctBreak {
1621 pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1622 pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1623 pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1624 pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1625}
1626#[test]
1627fn indic_conjunct_break_consts();
1628}
1629
1630make_enumerated_property! {
1631 name: "Indic_Conjunct_Break";
1632 short_name: "InCB";
1633 ident: IndicConjunctBreak;
1634 data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1635 singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1636 ule_ty: u8;
1637}
1638
1639/// Property Indic_Syllabic_Category.
1640/// See UAX #44:
1641/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1642///
1643/// # Example
1644///
1645/// ```
1646/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1647///
1648/// assert_eq!(
1649/// CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1650/// IndicSyllabicCategory::Other
1651/// );
1652/// assert_eq!(
1653/// CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1654/// IndicSyllabicCategory::Bindu
1655/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1656/// ```
1657#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1658#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1659#[allow(clippy::exhaustive_structs)] // newtype
1660#[repr(transparent)]
1661pub struct IndicSyllabicCategory(pub(crate) u8);
1662
1663impl IndicSyllabicCategory {
1664 /// Returns an ICU4C `UIndicSyllabicCategory` value.
1665 pub const fn to_icu4c_value(self) -> u8 {
1666 self.0
1667 }
1668 /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1669 pub const fn from_icu4c_value(value: u8) -> Self {
1670 Self(value)
1671 }
1672}
1673
1674create_const_array! {
1675#[allow(missing_docs)] // These constants don't need individual documentation.
1676#[allow(non_upper_case_globals)]
1677impl IndicSyllabicCategory {
1678 pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1679 pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1680 pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1681 pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1682 pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1683 pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1684 pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1685 pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1686 pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1687 pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1688 pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1689 pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1690 pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1691 pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1692 pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1693 pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(15);
1694 pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(16);
1695 pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1696 pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1697 pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1698 pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1699 pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1700 pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1701 pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1702 pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1703 pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1704 pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1705 pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1706 pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1707 pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1708 pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1709 pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1710 pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1711 pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1712 pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1713 pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1714 pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1715}
1716#[test]
1717fn indic_syllabic_category_consts();
1718}
1719
1720make_enumerated_property! {
1721 name: "Indic_Syllabic_Category";
1722 short_name: "InSC";
1723 ident: IndicSyllabicCategory;
1724 data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1725 singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1726 ule_ty: u8;
1727}
1728
1729/// Enumerated property Joining_Type.
1730///
1731/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1732/// each property value.
1733///
1734/// # Example
1735///
1736/// ```
1737/// use icu::properties::{props::JoiningType, CodePointMapData};
1738///
1739/// assert_eq!(
1740/// CodePointMapData::<JoiningType>::new().get('ؠ'),
1741/// JoiningType::DualJoining
1742/// ); // U+0620: Arabic Letter Kashmiri Yeh
1743/// assert_eq!(
1744/// CodePointMapData::<JoiningType>::new().get('𐫍'),
1745/// JoiningType::LeftJoining
1746/// ); // U+10ACD: Manichaean Letter Heth
1747/// ```
1748#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1749#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1750#[allow(clippy::exhaustive_structs)] // newtype
1751#[repr(transparent)]
1752pub struct JoiningType(pub(crate) u8);
1753
1754impl JoiningType {
1755 /// Returns an ICU4C `UJoiningType` value.
1756 pub const fn to_icu4c_value(self) -> u8 {
1757 self.0
1758 }
1759 /// Constructor from an ICU4C `UJoiningType` value.
1760 pub const fn from_icu4c_value(value: u8) -> Self {
1761 Self(value)
1762 }
1763}
1764
1765create_const_array! {
1766#[allow(missing_docs)] // These constants don't need individual documentation.
1767#[allow(non_upper_case_globals)]
1768impl JoiningType {
1769 pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1770 pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1771 pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1772 pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1773 pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1774 pub const Transparent: JoiningType = JoiningType(5); // name="T"
1775}
1776#[test]
1777fn joining_type_consts();
1778}
1779
1780make_enumerated_property! {
1781 name: "Joining_Type";
1782 short_name: "jt";
1783 ident: JoiningType;
1784 data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1785 singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1786 ule_ty: u8;
1787}
1788
1789/// Property Vertical_Orientation
1790///
1791/// See UTR #50:
1792/// <https://www.unicode.org/reports/tr50/#vo>
1793///
1794/// # Example
1795///
1796/// ```
1797/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1798///
1799/// assert_eq!(
1800/// CodePointMapData::<VerticalOrientation>::new().get('a'),
1801/// VerticalOrientation::Rotated
1802/// );
1803/// assert_eq!(
1804/// CodePointMapData::<VerticalOrientation>::new().get('§'),
1805/// VerticalOrientation::Upright
1806/// );
1807/// assert_eq!(
1808/// CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1809/// VerticalOrientation::TransformedRotated
1810/// );
1811/// assert_eq!(
1812/// CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1813/// VerticalOrientation::TransformedUpright
1814/// );
1815/// ```
1816#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1817#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1818#[allow(clippy::exhaustive_structs)] // newtype
1819#[repr(transparent)]
1820pub struct VerticalOrientation(pub(crate) u8);
1821
1822impl VerticalOrientation {
1823 /// Returns an ICU4C `UVerticalOrientation` value.
1824 pub const fn to_icu4c_value(self) -> u8 {
1825 self.0
1826 }
1827 /// Constructor from an ICU4C `UVerticalOrientation` value.
1828 pub const fn from_icu4c_value(value: u8) -> Self {
1829 Self(value)
1830 }
1831}
1832
1833create_const_array! {
1834#[allow(missing_docs)] // These constants don't need individual documentation.
1835#[allow(non_upper_case_globals)]
1836impl VerticalOrientation {
1837 pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1838 pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1839 pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1840 pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1841}
1842#[test]
1843fn vertical_orientation_consts();
1844}
1845
1846make_enumerated_property! {
1847 name: "Vertical_Orientation";
1848 short_name: "vo";
1849 ident: VerticalOrientation;
1850 data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1851 singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1852 ule_ty: u8;
1853}
1854
1855pub use crate::code_point_set::BinaryProperty;
1856
1857macro_rules! make_binary_property {
1858 (
1859 name: $name:literal;
1860 short_name: $short_name:literal;
1861 ident: $ident:ident;
1862 data_marker: $data_marker:ty;
1863 singleton: $singleton:ident;
1864 $(#[$doc:meta])+
1865 ) => {
1866 $(#[$doc])+
1867 #[derive(Debug)]
1868 #[non_exhaustive]
1869 pub struct $ident;
1870
1871 impl crate::private::Sealed for $ident {}
1872
1873 impl BinaryProperty for $ident {
1874 type DataMarker = $data_marker;
1875 #[cfg(feature = "compiled_data")]
1876 const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1877 &crate::provider::Baked::$singleton;
1878 const NAME: &'static [u8] = $name.as_bytes();
1879 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1880 }
1881 };
1882}
1883
1884make_binary_property! {
1885 name: "ASCII_Hex_Digit";
1886 short_name: "AHex";
1887 ident: AsciiHexDigit;
1888 data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1889 singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1890 /// ASCII characters commonly used for the representation of hexadecimal numbers.
1891 ///
1892 /// # Example
1893 ///
1894 /// ```
1895 /// use icu::properties::CodePointSetData;
1896 /// use icu::properties::props::AsciiHexDigit;
1897 ///
1898 /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1899 ///
1900 /// assert!(ascii_hex_digit.contains('3'));
1901 /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1902 /// assert!(ascii_hex_digit.contains('A'));
1903 /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1904 /// ```
1905}
1906
1907make_binary_property! {
1908 name: "alnum";
1909 short_name: "alnum";
1910 ident: Alnum;
1911 data_marker: crate::provider::PropertyBinaryAlnumV1;
1912 singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1913 /// Characters with the `Alphabetic` or `Decimal_Number` property.
1914 ///
1915 /// This is defined for POSIX compatibility.
1916}
1917
1918make_binary_property! {
1919 name: "Alphabetic";
1920 short_name: "Alpha";
1921 ident: Alphabetic;
1922 data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1923 singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1924 /// Alphabetic characters.
1925 ///
1926 /// # Example
1927 ///
1928 /// ```
1929 /// use icu::properties::CodePointSetData;
1930 /// use icu::properties::props::Alphabetic;
1931 ///
1932 /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1933 ///
1934 /// assert!(!alphabetic.contains('3'));
1935 /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1936 /// assert!(alphabetic.contains('A'));
1937 /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1938 /// ```
1939
1940}
1941
1942make_binary_property! {
1943 name: "Bidi_Control";
1944 short_name: "Bidi_C";
1945 ident: BidiControl;
1946 data_marker: crate::provider::PropertyBinaryBidiControlV1;
1947 singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1948 /// Format control characters which have specific functions in the Unicode Bidirectional
1949 /// Algorithm.
1950 ///
1951 /// # Example
1952 ///
1953 /// ```
1954 /// use icu::properties::CodePointSetData;
1955 /// use icu::properties::props::BidiControl;
1956 ///
1957 /// let bidi_control = CodePointSetData::new::<BidiControl>();
1958 ///
1959 /// assert!(bidi_control.contains('\u{200F}')); // RIGHT-TO-LEFT MARK
1960 /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN
1961 /// ```
1962
1963}
1964
1965make_binary_property! {
1966 name: "Bidi_Mirrored";
1967 short_name: "Bidi_M";
1968 ident: BidiMirrored;
1969 data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1970 singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1971 /// Characters that are mirrored in bidirectional text.
1972 ///
1973 /// # Example
1974 ///
1975 /// ```
1976 /// use icu::properties::CodePointSetData;
1977 /// use icu::properties::props::BidiMirrored;
1978 ///
1979 /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1980 ///
1981 /// assert!(bidi_mirrored.contains('['));
1982 /// assert!(bidi_mirrored.contains(']'));
1983 /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION
1984 /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA
1985 /// ```
1986
1987}
1988
1989make_binary_property! {
1990 name: "blank";
1991 short_name: "blank";
1992 ident: Blank;
1993 data_marker: crate::provider::PropertyBinaryBlankV1;
1994 singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1995 /// Horizontal whitespace characters
1996
1997}
1998
1999make_binary_property! {
2000 name: "Cased";
2001 short_name: "Cased";
2002 ident: Cased;
2003 data_marker: crate::provider::PropertyBinaryCasedV1;
2004 singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
2005 /// Uppercase, lowercase, and titlecase characters.
2006 ///
2007 /// # Example
2008 ///
2009 /// ```
2010 /// use icu::properties::CodePointSetData;
2011 /// use icu::properties::props::Cased;
2012 ///
2013 /// let cased = CodePointSetData::new::<Cased>();
2014 ///
2015 /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
2016 /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU
2017 /// ```
2018
2019}
2020
2021make_binary_property! {
2022 name: "Case_Ignorable";
2023 short_name: "CI";
2024 ident: CaseIgnorable;
2025 data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
2026 singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
2027 /// Characters which are ignored for casing purposes.
2028 ///
2029 /// # Example
2030 ///
2031 /// ```
2032 /// use icu::properties::CodePointSetData;
2033 /// use icu::properties::props::CaseIgnorable;
2034 ///
2035 /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
2036 ///
2037 /// assert!(case_ignorable.contains(':'));
2038 /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMBDA
2039 /// ```
2040
2041}
2042
2043make_binary_property! {
2044 name: "Full_Composition_Exclusion";
2045 short_name: "Comp_Ex";
2046 ident: FullCompositionExclusion;
2047 data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
2048 singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
2049 /// Characters that are excluded from composition.
2050 ///
2051 /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
2052
2053}
2054
2055make_binary_property! {
2056 name: "Changes_When_Casefolded";
2057 short_name: "CWCF";
2058 ident: ChangesWhenCasefolded;
2059 data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
2060 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
2061 /// Characters whose normalized forms are not stable under case folding.
2062 ///
2063 /// # Example
2064 ///
2065 /// ```
2066 /// use icu::properties::CodePointSetData;
2067 /// use icu::properties::props::ChangesWhenCasefolded;
2068 ///
2069 /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
2070 ///
2071 /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S
2072 /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA
2073 /// ```
2074
2075}
2076
2077make_binary_property! {
2078 name: "Changes_When_Casemapped";
2079 short_name: "CWCM";
2080 ident: ChangesWhenCasemapped;
2081 data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
2082 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
2083 /// Characters which may change when they undergo case mapping.
2084
2085}
2086
2087make_binary_property! {
2088 name: "Changes_When_NFKC_Casefolded";
2089 short_name: "CWKCF";
2090 ident: ChangesWhenNfkcCasefolded;
2091 data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
2092 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
2093 /// Characters which are not identical to their `NFKC_Casefold` mapping.
2094 ///
2095 /// # Example
2096 ///
2097 /// ```
2098 /// use icu::properties::CodePointSetData;
2099 /// use icu::properties::props::ChangesWhenNfkcCasefolded;
2100 ///
2101 /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
2102 ///
2103 /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F
2104 /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2105 /// ```
2106
2107}
2108
2109make_binary_property! {
2110 name: "Changes_When_Lowercased";
2111 short_name: "CWL";
2112 ident: ChangesWhenLowercased;
2113 data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2114 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2115 /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2116 ///
2117 /// # Example
2118 ///
2119 /// ```
2120 /// use icu::properties::CodePointSetData;
2121 /// use icu::properties::props::ChangesWhenLowercased;
2122 ///
2123 /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2124 ///
2125 /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2126 /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR
2127 /// ```
2128
2129}
2130
2131make_binary_property! {
2132 name: "Changes_When_Titlecased";
2133 short_name: "CWT";
2134 ident: ChangesWhenTitlecased;
2135 data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2136 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2137 /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2138 ///
2139 /// # Example
2140 ///
2141 /// ```
2142 /// use icu::properties::CodePointSetData;
2143 /// use icu::properties::props::ChangesWhenTitlecased;
2144 ///
2145 /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2146 ///
2147 /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE
2148 /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE
2149 /// ```
2150
2151}
2152
2153make_binary_property! {
2154 name: "Changes_When_Uppercased";
2155 short_name: "CWU";
2156 ident: ChangesWhenUppercased;
2157 data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2158 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2159 /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2160 ///
2161 /// # Example
2162 ///
2163 /// ```
2164 /// use icu::properties::CodePointSetData;
2165 /// use icu::properties::props::ChangesWhenUppercased;
2166 ///
2167 /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2168 ///
2169 /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN
2170 /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN
2171 /// ```
2172
2173}
2174
2175make_binary_property! {
2176 name: "Dash";
2177 short_name: "Dash";
2178 ident: Dash;
2179 data_marker: crate::provider::PropertyBinaryDashV1;
2180 singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2181 /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2182 /// their compatibility equivalents.
2183 ///
2184 /// # Example
2185 ///
2186 /// ```
2187 /// use icu::properties::CodePointSetData;
2188 /// use icu::properties::props::Dash;
2189 ///
2190 /// let dash = CodePointSetData::new::<Dash>();
2191 ///
2192 /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH
2193 /// assert!(dash.contains('-')); // U+002D
2194 /// assert!(!dash.contains('=')); // U+003D
2195 /// ```
2196
2197}
2198
2199make_binary_property! {
2200 name: "Deprecated";
2201 short_name: "Dep";
2202 ident: Deprecated;
2203 data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2204 singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2205 /// Deprecated characters.
2206 ///
2207 /// No characters will ever be removed from the standard, but the
2208 /// usage of deprecated characters is strongly discouraged.
2209 ///
2210 /// # Example
2211 ///
2212 /// ```
2213 /// use icu::properties::CodePointSetData;
2214 /// use icu::properties::props::Deprecated;
2215 ///
2216 /// let deprecated = CodePointSetData::new::<Deprecated>();
2217 ///
2218 /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2219 /// assert!(!deprecated.contains('A'));
2220 /// ```
2221
2222}
2223
2224make_binary_property! {
2225 name: "Default_Ignorable_Code_Point";
2226 short_name: "DI";
2227 ident: DefaultIgnorableCodePoint;
2228 data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2229 singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2230 /// For programmatic determination of default ignorable code points.
2231 ///
2232 /// New characters that
2233 /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2234 /// ranges, permitting programs to correctly handle the default rendering of such
2235 /// characters when not otherwise supported.
2236 ///
2237 /// # Example
2238 ///
2239 /// ```
2240 /// use icu::properties::CodePointSetData;
2241 /// use icu::properties::props::DefaultIgnorableCodePoint;
2242 ///
2243 /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2244 ///
2245 /// assert!(default_ignorable_code_point.contains('\u{180B}')); // MONGOLIAN FREE VARIATION SELECTOR ONE
2246 /// assert!(!default_ignorable_code_point.contains('E'));
2247 /// ```
2248
2249}
2250
2251make_binary_property! {
2252 name: "Diacritic";
2253 short_name: "Dia";
2254 ident: Diacritic;
2255 data_marker: crate::provider::PropertyBinaryDiacriticV1;
2256 singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2257 /// Characters that linguistically modify the meaning of another character to which they apply.
2258 ///
2259 /// # Example
2260 ///
2261 /// ```
2262 /// use icu::properties::CodePointSetData;
2263 /// use icu::properties::props::Diacritic;
2264 ///
2265 /// let diacritic = CodePointSetData::new::<Diacritic>();
2266 ///
2267 /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS
2268 /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF
2269 /// ```
2270
2271}
2272
2273make_binary_property! {
2274 name: "Emoji_Modifier_Base";
2275 short_name: "EBase";
2276 ident: EmojiModifierBase;
2277 data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2278 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2279 /// Characters that can serve as a base for emoji modifiers.
2280 ///
2281 /// # Example
2282 ///
2283 /// ```
2284 /// use icu::properties::CodePointSetData;
2285 /// use icu::properties::props::EmojiModifierBase;
2286 ///
2287 /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2288 ///
2289 /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST
2290 /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN
2291 /// ```
2292
2293}
2294
2295make_binary_property! {
2296 name: "Emoji_Component";
2297 short_name: "EComp";
2298 ident: EmojiComponent;
2299 data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2300 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2301 /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2302 /// separate choices, such as base characters for emoji keycaps.
2303 ///
2304 /// # Example
2305 ///
2306 /// ```
2307 /// use icu::properties::CodePointSetData;
2308 /// use icu::properties::props::EmojiComponent;
2309 ///
2310 /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2311 ///
2312 /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2313 /// assert!(emoji_component.contains('\u{20E3}')); // COMBINING ENCLOSING KEYCAP
2314 /// assert!(emoji_component.contains('7'));
2315 /// assert!(!emoji_component.contains('T'));
2316 /// ```
2317
2318}
2319
2320make_binary_property! {
2321 name: "Emoji_Modifier";
2322 short_name: "EMod";
2323 ident: EmojiModifier;
2324 data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2325 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2326 /// Characters that are emoji modifiers.
2327 ///
2328 /// # Example
2329 ///
2330 /// ```
2331 /// use icu::properties::CodePointSetData;
2332 /// use icu::properties::props::EmojiModifier;
2333 ///
2334 /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2335 ///
2336 /// assert!(emoji_modifier.contains('\u{1F3FD}')); // EMOJI MODIFIER FITZPATRICK TYPE-4
2337 /// assert!(!emoji_modifier.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2338 /// ```
2339
2340}
2341
2342make_binary_property! {
2343 name: "Emoji";
2344 short_name: "Emoji";
2345 ident: Emoji;
2346 data_marker: crate::provider::PropertyBinaryEmojiV1;
2347 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2348 /// Characters that are emoji.
2349 ///
2350 /// # Example
2351 ///
2352 /// ```
2353 /// use icu::properties::CodePointSetData;
2354 /// use icu::properties::props::Emoji;
2355 ///
2356 /// let emoji = CodePointSetData::new::<Emoji>();
2357 ///
2358 /// assert!(emoji.contains('🔥')); // U+1F525 FIRE
2359 /// assert!(!emoji.contains('V'));
2360 /// ```
2361
2362}
2363
2364make_binary_property! {
2365 name: "Emoji_Presentation";
2366 short_name: "EPres";
2367 ident: EmojiPresentation;
2368 data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2369 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2370 /// Characters that have emoji presentation by default.
2371 ///
2372 /// # Example
2373 ///
2374 /// ```
2375 /// use icu::properties::CodePointSetData;
2376 /// use icu::properties::props::EmojiPresentation;
2377 ///
2378 /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2379 ///
2380 /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2381 /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2382 /// ```
2383
2384}
2385
2386make_binary_property! {
2387 name: "Extender";
2388 short_name: "Ext";
2389 ident: Extender;
2390 data_marker: crate::provider::PropertyBinaryExtenderV1;
2391 singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2392 /// Characters whose principal function is to extend the value of a preceding alphabetic
2393 /// character or to extend the shape of adjacent characters.
2394 ///
2395 /// # Example
2396 ///
2397 /// ```
2398 /// use icu::properties::CodePointSetData;
2399 /// use icu::properties::props::Extender;
2400 ///
2401 /// let extender = CodePointSetData::new::<Extender>();
2402 ///
2403 /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK
2404 /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2405 /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT
2406 /// ```
2407
2408}
2409
2410make_binary_property! {
2411 name: "Extended_Pictographic";
2412 short_name: "ExtPict";
2413 ident: ExtendedPictographic;
2414 data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2415 singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2416 /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2417 /// emoji characters
2418 ///
2419 /// # Example
2420 ///
2421 /// ```
2422 /// use icu::properties::CodePointSetData;
2423 /// use icu::properties::props::ExtendedPictographic;
2424 ///
2425 /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2426 ///
2427 /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2428 /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2429 /// ```
2430
2431}
2432
2433make_binary_property! {
2434 name: "graph";
2435 short_name: "graph";
2436 ident: Graph;
2437 data_marker: crate::provider::PropertyBinaryGraphV1;
2438 singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2439 /// Invisible characters.
2440 ///
2441 /// This is defined for POSIX compatibility.
2442
2443}
2444
2445make_binary_property! {
2446 name: "Grapheme_Base";
2447 short_name: "Gr_Base";
2448 ident: GraphemeBase;
2449 data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2450 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2451 /// Property used together with the definition of Standard Korean Syllable Block to define
2452 /// "Grapheme base".
2453 ///
2454 /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2455 ///
2456 /// # Example
2457 ///
2458 /// ```
2459 /// use icu::properties::CodePointSetData;
2460 /// use icu::properties::props::GraphemeBase;
2461 ///
2462 /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2463 ///
2464 /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2465 /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2466 /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2467 /// ```
2468
2469}
2470
2471make_binary_property! {
2472 name: "Grapheme_Extend";
2473 short_name: "Gr_Ext";
2474 ident: GraphemeExtend;
2475 data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2476 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2477 /// Property used to define "Grapheme extender".
2478 ///
2479 /// See D59 in Chapter 3, Conformance in the
2480 /// Unicode Standard.
2481 ///
2482 /// # Example
2483 ///
2484 /// ```
2485 /// use icu::properties::CodePointSetData;
2486 /// use icu::properties::props::GraphemeExtend;
2487 ///
2488 /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2489 ///
2490 /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2491 /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2492 /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2493 /// ```
2494
2495}
2496
2497make_binary_property! {
2498 name: "Grapheme_Link";
2499 short_name: "Gr_Link";
2500 ident: GraphemeLink;
2501 data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2502 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2503 /// Deprecated property.
2504 ///
2505 /// Formerly proposed for programmatic determination of grapheme
2506 /// cluster boundaries.
2507}
2508
2509make_binary_property! {
2510 name: "Hex_Digit";
2511 short_name: "Hex";
2512 ident: HexDigit;
2513 data_marker: crate::provider::PropertyBinaryHexDigitV1;
2514 singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2515 /// Characters commonly used for the representation of hexadecimal numbers, plus their
2516 /// compatibility equivalents.
2517 ///
2518 /// # Example
2519 ///
2520 /// ```
2521 /// use icu::properties::CodePointSetData;
2522 /// use icu::properties::props::HexDigit;
2523 ///
2524 /// let hex_digit = CodePointSetData::new::<HexDigit>();
2525 ///
2526 /// assert!(hex_digit.contains('0'));
2527 /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
2528 /// assert!(hex_digit.contains('f'));
2529 /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2530 /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2531 /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2532 /// ```
2533}
2534
2535make_binary_property! {
2536 name: "Hyphen";
2537 short_name: "Hyphen";
2538 ident: Hyphen;
2539 data_marker: crate::provider::PropertyBinaryHyphenV1;
2540 singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2541 /// Deprecated property.
2542 ///
2543 /// Dashes which are used to mark connections between pieces of
2544 /// words, plus the Katakana middle dot.
2545}
2546
2547make_binary_property! {
2548 name: "ID_Compat_Math_Continue";
2549 short_name: "ID_Compat_Math_Continue";
2550 ident: IdCompatMathContinue;
2551 data_marker: crate::provider::PropertyBinaryIdCompatMathContinueV1;
2552 singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_CONTINUE_V1;
2553 /// ID_Compat_Math_Continue Property
2554}
2555
2556make_binary_property! {
2557 name: "ID_Compat_Math_Start";
2558 short_name: "ID_Compat_Math_Start";
2559 ident: IdCompatMathStart;
2560 data_marker: crate::provider::PropertyBinaryIdCompatMathStartV1;
2561 singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_START_V1;
2562 /// ID_Compat_Math_Start Property
2563}
2564
2565make_binary_property! {
2566 name: "ID_Continue";
2567 short_name: "IDC";
2568 ident: IdContinue;
2569 data_marker: crate::provider::PropertyBinaryIdContinueV1;
2570 singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2571 /// Characters that can come after the first character in an identifier.
2572 ///
2573 /// If using NFKC to
2574 /// fold differences between characters, use [`XidContinue`] instead. See
2575 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2576 /// more details.
2577 ///
2578 /// # Example
2579 ///
2580 /// ```
2581 /// use icu::properties::CodePointSetData;
2582 /// use icu::properties::props::IdContinue;
2583 ///
2584 /// let id_continue = CodePointSetData::new::<IdContinue>();
2585 ///
2586 /// assert!(id_continue.contains('x'));
2587 /// assert!(id_continue.contains('1'));
2588 /// assert!(id_continue.contains('_'));
2589 /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA
2590 /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2591 /// assert!(id_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2592 /// ```
2593}
2594
2595make_binary_property! {
2596 name: "Ideographic";
2597 short_name: "Ideo";
2598 ident: Ideographic;
2599 data_marker: crate::provider::PropertyBinaryIdeographicV1;
2600 singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2601 /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2602 /// ideographs, or related siniform ideographs
2603 ///
2604 /// # Example
2605 ///
2606 /// ```
2607 /// use icu::properties::CodePointSetData;
2608 /// use icu::properties::props::Ideographic;
2609 ///
2610 /// let ideographic = CodePointSetData::new::<Ideographic>();
2611 ///
2612 /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2613 /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB
2614 /// ```
2615}
2616
2617make_binary_property! {
2618 name: "ID_Start";
2619 short_name: "IDS";
2620 ident: IdStart;
2621 data_marker: crate::provider::PropertyBinaryIdStartV1;
2622 singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2623 /// Characters that can begin an identifier.
2624 ///
2625 /// If using NFKC to fold differences between
2626 /// characters, use [`XidStart`] instead. See [`Unicode Standard Annex
2627 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2628 ///
2629 /// # Example
2630 ///
2631 /// ```
2632 /// use icu::properties::CodePointSetData;
2633 /// use icu::properties::props::IdStart;
2634 ///
2635 /// let id_start = CodePointSetData::new::<IdStart>();
2636 ///
2637 /// assert!(id_start.contains('x'));
2638 /// assert!(!id_start.contains('1'));
2639 /// assert!(!id_start.contains('_'));
2640 /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA
2641 /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2642 /// assert!(id_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2643 /// ```
2644}
2645
2646make_binary_property! {
2647 name: "IDS_Binary_Operator";
2648 short_name: "IDSB";
2649 ident: IdsBinaryOperator;
2650 data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2651 singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2652 /// Characters used in Ideographic Description Sequences.
2653 ///
2654 /// # Example
2655 ///
2656 /// ```
2657 /// use icu::properties::CodePointSetData;
2658 /// use icu::properties::props::IdsBinaryOperator;
2659 ///
2660 /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2661 ///
2662 /// assert!(ids_binary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2663 /// assert!(!ids_binary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2664 /// ```
2665}
2666
2667make_binary_property! {
2668 name: "IDS_Trinary_Operator";
2669 short_name: "IDST";
2670 ident: IdsTrinaryOperator;
2671 data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2672 singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2673 /// Characters used in Ideographic Description Sequences.
2674 ///
2675 /// # Example
2676 ///
2677 /// ```
2678 /// use icu::properties::CodePointSetData;
2679 /// use icu::properties::props::IdsTrinaryOperator;
2680 ///
2681 /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2682 ///
2683 /// assert!(ids_trinary_operator.contains('\u{2FF2}')); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2684 /// assert!(ids_trinary_operator.contains('\u{2FF3}')); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2685 /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2686 /// assert!(!ids_trinary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2687 /// assert!(!ids_trinary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2688 /// ```
2689}
2690
2691make_binary_property! {
2692 name: "IDS_Unary_Operator";
2693 short_name: "IDSU";
2694 ident: IdsUnaryOperator;
2695 data_marker: crate::provider::PropertyBinaryIdsUnaryOperatorV1;
2696 singleton: SINGLETON_PROPERTY_BINARY_IDS_UNARY_OPERATOR_V1;
2697 /// IDS_Unary_Operator Property
2698}
2699
2700make_binary_property! {
2701 name: "Join_Control";
2702 short_name: "Join_C";
2703 ident: JoinControl;
2704 data_marker: crate::provider::PropertyBinaryJoinControlV1;
2705 singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2706 /// Format control characters which have specific functions for control of cursive joining
2707 /// and ligation.
2708 ///
2709 /// # Example
2710 ///
2711 /// ```
2712 /// use icu::properties::CodePointSetData;
2713 /// use icu::properties::props::JoinControl;
2714 ///
2715 /// let join_control = CodePointSetData::new::<JoinControl>();
2716 ///
2717 /// assert!(join_control.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2718 /// assert!(join_control.contains('\u{200D}')); // ZERO WIDTH JOINER
2719 /// assert!(!join_control.contains('\u{200E}'));
2720 /// ```
2721}
2722
2723make_binary_property! {
2724 name: "Logical_Order_Exception";
2725 short_name: "LOE";
2726 ident: LogicalOrderException;
2727 data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2728 singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2729 /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2730 ///
2731 /// # Example
2732 ///
2733 /// ```
2734 /// use icu::properties::CodePointSetData;
2735 /// use icu::properties::props::LogicalOrderException;
2736 ///
2737 /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2738 ///
2739 /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI
2740 /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A
2741 /// ```
2742}
2743
2744make_binary_property! {
2745 name: "Lowercase";
2746 short_name: "Lower";
2747 ident: Lowercase;
2748 data_marker: crate::provider::PropertyBinaryLowercaseV1;
2749 singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2750 /// Lowercase characters.
2751 ///
2752 /// # Example
2753 ///
2754 /// ```
2755 /// use icu::properties::CodePointSetData;
2756 /// use icu::properties::props::Lowercase;
2757 ///
2758 /// let lowercase = CodePointSetData::new::<Lowercase>();
2759 ///
2760 /// assert!(lowercase.contains('a'));
2761 /// assert!(!lowercase.contains('A'));
2762 /// ```
2763}
2764
2765make_binary_property! {
2766 name: "Math";
2767 short_name: "Math";
2768 ident: Math;
2769 data_marker: crate::provider::PropertyBinaryMathV1;
2770 singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2771 /// Characters used in mathematical notation.
2772 ///
2773 /// # Example
2774 ///
2775 /// ```
2776 /// use icu::properties::CodePointSetData;
2777 /// use icu::properties::props::Math;
2778 ///
2779 /// let math = CodePointSetData::new::<Math>();
2780 ///
2781 /// assert!(math.contains('='));
2782 /// assert!(math.contains('+'));
2783 /// assert!(!math.contains('-'));
2784 /// assert!(math.contains('−')); // U+2212 MINUS SIGN
2785 /// assert!(!math.contains('/'));
2786 /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH
2787 /// ```
2788}
2789
2790make_binary_property! {
2791 name: "Modifier_Combining_Mark";
2792 short_name: "MCM";
2793 ident: ModifierCombiningMark;
2794 data_marker: crate::provider::PropertyBinaryModifierCombiningMarkV1;
2795 singleton: SINGLETON_PROPERTY_BINARY_MODIFIER_COMBINING_MARK_V1;
2796 /// Modifier_Combining_Mark Property
2797}
2798
2799make_binary_property! {
2800 name: "Noncharacter_Code_Point";
2801 short_name: "NChar";
2802 ident: NoncharacterCodePoint;
2803 data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2804 singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2805 /// Code points permanently reserved for internal use.
2806 ///
2807 /// # Example
2808 ///
2809 /// ```
2810 /// use icu::properties::CodePointSetData;
2811 /// use icu::properties::props::NoncharacterCodePoint;
2812 ///
2813 /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2814 ///
2815 /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2816 /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2817 /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2818 /// ```
2819}
2820
2821make_binary_property! {
2822 name: "NFC_Inert";
2823 short_name: "nfcinert";
2824 ident: NfcInert;
2825 data_marker: crate::provider::PropertyBinaryNfcInertV1;
2826 singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2827 /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2828}
2829
2830make_binary_property! {
2831 name: "NFD_Inert";
2832 short_name: "nfdinert";
2833 ident: NfdInert;
2834 data_marker: crate::provider::PropertyBinaryNfdInertV1;
2835 singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2836 /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2837}
2838
2839make_binary_property! {
2840 name: "NFKC_Inert";
2841 short_name: "nfkcinert";
2842 ident: NfkcInert;
2843 data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2844 singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2845 /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2846}
2847
2848make_binary_property! {
2849 name: "NFKD_Inert";
2850 short_name: "nfkdinert";
2851 ident: NfkdInert;
2852 data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2853 singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2854 /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2855}
2856
2857make_binary_property! {
2858 name: "Pattern_Syntax";
2859 short_name: "Pat_Syn";
2860 ident: PatternSyntax;
2861 data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2862 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2863 /// Characters used as syntax in patterns (such as regular expressions).
2864 ///
2865 /// See [`Unicode
2866 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2867 /// details.
2868 ///
2869 /// # Example
2870 ///
2871 /// ```
2872 /// use icu::properties::CodePointSetData;
2873 /// use icu::properties::props::PatternSyntax;
2874 ///
2875 /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2876 ///
2877 /// assert!(pattern_syntax.contains('{'));
2878 /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW
2879 /// assert!(!pattern_syntax.contains('0'));
2880 /// ```
2881}
2882
2883make_binary_property! {
2884 name: "Pattern_White_Space";
2885 short_name: "Pat_WS";
2886 ident: PatternWhiteSpace;
2887 data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2888 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2889 /// Characters used as whitespace in patterns (such as regular expressions).
2890 ///
2891 /// See
2892 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2893 /// more details.
2894 ///
2895 /// # Example
2896 ///
2897 /// ```
2898 /// use icu::properties::CodePointSetData;
2899 /// use icu::properties::props::PatternWhiteSpace;
2900 ///
2901 /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2902 ///
2903 /// assert!(pattern_white_space.contains(' '));
2904 /// assert!(pattern_white_space.contains('\u{2029}')); // PARAGRAPH SEPARATOR
2905 /// assert!(pattern_white_space.contains('\u{000A}')); // NEW LINE
2906 /// assert!(!pattern_white_space.contains('\u{00A0}')); // NO-BREAK SPACE
2907 /// ```
2908}
2909
2910make_binary_property! {
2911 name: "Prepended_Concatenation_Mark";
2912 short_name: "PCM";
2913 ident: PrependedConcatenationMark;
2914 data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2915 singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2916 /// A small class of visible format controls, which precede and then span a sequence of
2917 /// other characters, usually digits.
2918}
2919
2920make_binary_property! {
2921 name: "print";
2922 short_name: "print";
2923 ident: Print;
2924 data_marker: crate::provider::PropertyBinaryPrintV1;
2925 singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2926 /// Printable characters (visible characters and whitespace).
2927 ///
2928 /// This is defined for POSIX compatibility.
2929}
2930
2931make_binary_property! {
2932 name: "Quotation_Mark";
2933 short_name: "QMark";
2934 ident: QuotationMark;
2935 data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2936 singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2937 /// Punctuation characters that function as quotation marks.
2938 ///
2939 /// # Example
2940 ///
2941 /// ```
2942 /// use icu::properties::CodePointSetData;
2943 /// use icu::properties::props::QuotationMark;
2944 ///
2945 /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2946 ///
2947 /// assert!(quotation_mark.contains('\''));
2948 /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK
2949 /// assert!(!quotation_mark.contains('<'));
2950 /// ```
2951}
2952
2953make_binary_property! {
2954 name: "Radical";
2955 short_name: "Radical";
2956 ident: Radical;
2957 data_marker: crate::provider::PropertyBinaryRadicalV1;
2958 singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2959 /// Characters used in the definition of Ideographic Description Sequences.
2960 ///
2961 /// # Example
2962 ///
2963 /// ```
2964 /// use icu::properties::CodePointSetData;
2965 /// use icu::properties::props::Radical;
2966 ///
2967 /// let radical = CodePointSetData::new::<Radical>();
2968 ///
2969 /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX
2970 /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2971 /// ```
2972}
2973
2974make_binary_property! {
2975 name: "Regional_Indicator";
2976 short_name: "RI";
2977 ident: RegionalIndicator;
2978 data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2979 singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2980 /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2981 ///
2982 /// # Example
2983 ///
2984 /// ```
2985 /// use icu::properties::CodePointSetData;
2986 /// use icu::properties::props::RegionalIndicator;
2987 ///
2988 /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2989 ///
2990 /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2991 /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2992 /// assert!(!regional_indicator.contains('T'));
2993 /// ```
2994}
2995
2996make_binary_property! {
2997 name: "Soft_Dotted";
2998 short_name: "SD";
2999 ident: SoftDotted;
3000 data_marker: crate::provider::PropertyBinarySoftDottedV1;
3001 singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
3002 /// Characters with a "soft dot", like i or j.
3003 ///
3004 /// An accent placed on these characters causes
3005 /// the dot to disappear.
3006 ///
3007 /// # Example
3008 ///
3009 /// ```
3010 /// use icu::properties::CodePointSetData;
3011 /// use icu::properties::props::SoftDotted;
3012 ///
3013 /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
3014 ///
3015 /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
3016 /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I
3017 /// ```
3018}
3019
3020make_binary_property! {
3021 name: "Segment_Starter";
3022 short_name: "segstart";
3023 ident: SegmentStarter;
3024 data_marker: crate::provider::PropertyBinarySegmentStarterV1;
3025 singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
3026 /// Characters that are starters in terms of Unicode normalization and combining character
3027 /// sequences.
3028}
3029
3030make_binary_property! {
3031 name: "Case_Sensitive";
3032 short_name: "Sensitive";
3033 ident: CaseSensitive;
3034 data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
3035 singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
3036 /// Characters that are either the source of a case mapping or in the target of a case
3037 /// mapping.
3038}
3039
3040make_binary_property! {
3041 name: "Sentence_Terminal";
3042 short_name: "STerm";
3043 ident: SentenceTerminal;
3044 data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
3045 singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
3046 /// Punctuation characters that generally mark the end of sentences.
3047 ///
3048 /// # Example
3049 ///
3050 /// ```
3051 /// use icu::properties::CodePointSetData;
3052 /// use icu::properties::props::SentenceTerminal;
3053 ///
3054 /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
3055 ///
3056 /// assert!(sentence_terminal.contains('.'));
3057 /// assert!(sentence_terminal.contains('?'));
3058 /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
3059 /// assert!(!sentence_terminal.contains(','));
3060 /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK
3061 /// ```
3062}
3063
3064make_binary_property! {
3065 name: "Terminal_Punctuation";
3066 short_name: "Term";
3067 ident: TerminalPunctuation;
3068 data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
3069 singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
3070 /// Punctuation characters that generally mark the end of textual units.
3071 ///
3072 /// # Example
3073 ///
3074 /// ```
3075 /// use icu::properties::CodePointSetData;
3076 /// use icu::properties::props::TerminalPunctuation;
3077 ///
3078 /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
3079 ///
3080 /// assert!(terminal_punctuation.contains('.'));
3081 /// assert!(terminal_punctuation.contains('?'));
3082 /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
3083 /// assert!(terminal_punctuation.contains(','));
3084 /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK
3085 /// ```
3086}
3087
3088make_binary_property! {
3089 name: "Unified_Ideograph";
3090 short_name: "UIdeo";
3091 ident: UnifiedIdeograph;
3092 data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
3093 singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
3094 /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
3095 ///
3096 /// # Example
3097 ///
3098 /// ```
3099 /// use icu::properties::CodePointSetData;
3100 /// use icu::properties::props::UnifiedIdeograph;
3101 ///
3102 /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
3103 ///
3104 /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
3105 /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728
3106 /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178
3107 /// ```
3108}
3109
3110make_binary_property! {
3111 name: "Uppercase";
3112 short_name: "Upper";
3113 ident: Uppercase;
3114 data_marker: crate::provider::PropertyBinaryUppercaseV1;
3115 singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
3116 /// Uppercase characters.
3117 ///
3118 /// # Example
3119 ///
3120 /// ```
3121 /// use icu::properties::CodePointSetData;
3122 /// use icu::properties::props::Uppercase;
3123 ///
3124 /// let uppercase = CodePointSetData::new::<Uppercase>();
3125 ///
3126 /// assert!(uppercase.contains('U'));
3127 /// assert!(!uppercase.contains('u'));
3128 /// ```
3129}
3130
3131make_binary_property! {
3132 name: "Variation_Selector";
3133 short_name: "VS";
3134 ident: VariationSelector;
3135 data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
3136 singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
3137 /// Characters that are Variation Selectors.
3138 ///
3139 /// # Example
3140 ///
3141 /// ```
3142 /// use icu::properties::CodePointSetData;
3143 /// use icu::properties::props::VariationSelector;
3144 ///
3145 /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3146 ///
3147 /// assert!(variation_selector.contains('\u{180D}')); // MONGOLIAN FREE VARIATION SELECTOR THREE
3148 /// assert!(!variation_selector.contains('\u{303E}')); // IDEOGRAPHIC VARIATION INDICATOR
3149 /// assert!(variation_selector.contains('\u{FE0F}')); // VARIATION SELECTOR-16
3150 /// assert!(!variation_selector.contains('\u{FE10}')); // PRESENTATION FORM FOR VERTICAL COMMA
3151 /// assert!(variation_selector.contains('\u{E01EF}')); // VARIATION SELECTOR-256
3152 /// ```
3153}
3154
3155make_binary_property! {
3156 name: "White_Space";
3157 short_name: "WSpace";
3158 ident: WhiteSpace;
3159 data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3160 singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3161 /// Spaces, separator characters and other control characters which should be treated by
3162 /// programming languages as "white space" for the purpose of parsing elements.
3163 ///
3164 /// # Example
3165 ///
3166 /// ```
3167 /// use icu::properties::CodePointSetData;
3168 /// use icu::properties::props::WhiteSpace;
3169 ///
3170 /// let white_space = CodePointSetData::new::<WhiteSpace>();
3171 ///
3172 /// assert!(white_space.contains(' '));
3173 /// assert!(white_space.contains('\u{000A}')); // NEW LINE
3174 /// assert!(white_space.contains('\u{00A0}')); // NO-BREAK SPACE
3175 /// assert!(!white_space.contains('\u{200B}')); // ZERO WIDTH SPACE
3176 /// ```
3177}
3178
3179make_binary_property! {
3180 name: "xdigit";
3181 short_name: "xdigit";
3182 ident: Xdigit;
3183 data_marker: crate::provider::PropertyBinaryXdigitV1;
3184 singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3185 /// Hexadecimal digits
3186 ///
3187 /// This is defined for POSIX compatibility.
3188}
3189
3190make_binary_property! {
3191 name: "XID_Continue";
3192 short_name: "XIDC";
3193 ident: XidContinue;
3194 data_marker: crate::provider::PropertyBinaryXidContinueV1;
3195 singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3196 /// Characters that can come after the first character in an identifier.
3197 ///
3198 /// See [`Unicode Standard Annex
3199 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3200 ///
3201 /// # Example
3202 ///
3203 /// ```
3204 /// use icu::properties::CodePointSetData;
3205 /// use icu::properties::props::XidContinue;
3206 ///
3207 /// let xid_continue = CodePointSetData::new::<XidContinue>();
3208 ///
3209 /// assert!(xid_continue.contains('x'));
3210 /// assert!(xid_continue.contains('1'));
3211 /// assert!(xid_continue.contains('_'));
3212 /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA
3213 /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
3214 /// assert!(!xid_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3215 /// ```
3216}
3217
3218make_binary_property! {
3219 name: "XID_Start";
3220 short_name: "XIDS";
3221 ident: XidStart;
3222 data_marker: crate::provider::PropertyBinaryXidStartV1;
3223 singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3224 /// Characters that can begin an identifier.
3225 ///
3226 /// See [`Unicode
3227 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3228 /// details.
3229 ///
3230 /// # Example
3231 ///
3232 /// ```
3233 /// use icu::properties::CodePointSetData;
3234 /// use icu::properties::props::XidStart;
3235 ///
3236 /// let xid_start = CodePointSetData::new::<XidStart>();
3237 ///
3238 /// assert!(xid_start.contains('x'));
3239 /// assert!(!xid_start.contains('1'));
3240 /// assert!(!xid_start.contains('_'));
3241 /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA
3242 /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
3243 /// assert!(!xid_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3244 /// ```
3245}
3246
3247pub use crate::emoji::EmojiSet;
3248
3249macro_rules! make_emoji_set {
3250 (
3251 name: $name:literal;
3252 short_name: $short_name:literal;
3253 ident: $ident:ident;
3254 data_marker: $data_marker:ty;
3255 singleton: $singleton:ident;
3256 $(#[$doc:meta])+
3257 ) => {
3258 $(#[$doc])+
3259 #[derive(Debug)]
3260 #[non_exhaustive]
3261 pub struct $ident;
3262
3263 impl crate::private::Sealed for $ident {}
3264
3265 impl EmojiSet for $ident {
3266 type DataMarker = $data_marker;
3267 #[cfg(feature = "compiled_data")]
3268 const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3269 &crate::provider::Baked::$singleton;
3270 const NAME: &'static [u8] = $name.as_bytes();
3271 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
3272 }
3273 }
3274}
3275
3276make_emoji_set! {
3277 name: "Basic_Emoji";
3278 short_name: "Basic_Emoji";
3279 ident: BasicEmoji;
3280 data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3281 singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3282 /// Characters and character sequences intended for general-purpose, independent, direct input.
3283 ///
3284 /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3285 /// details.
3286 ///
3287 /// # Example
3288 ///
3289 /// ```
3290 /// use icu::properties::EmojiSetData;
3291 /// use icu::properties::props::BasicEmoji;
3292 ///
3293 /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3294 ///
3295 /// assert!(!basic_emoji.contains('\u{0020}'));
3296 /// assert!(!basic_emoji.contains('\n'));
3297 /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3298 /// assert!(basic_emoji.contains_str("\u{1F983}"));
3299 /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3300 /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3
3301 /// ```
3302}
3303
3304#[cfg(test)]
3305mod test_enumerated_property_completeness {
3306 use super::*;
3307 use std::collections::BTreeMap;
3308
3309 fn check_enum<'a, T: NamedEnumeratedProperty>(
3310 lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3311 consts: impl IntoIterator<Item = &'a T>,
3312 ) where
3313 u16: From<T>,
3314 {
3315 let mut data: BTreeMap<_, _> = lookup
3316 .map
3317 .iter()
3318 .map(|(name, value)| (value, (name, "Data")))
3319 .collect();
3320
3321 let names = crate::PropertyNamesLong::<T>::new();
3322 let consts = consts.into_iter().map(|value| {
3323 (
3324 u16::from(*value) as usize,
3325 (
3326 names.get(*value).unwrap_or("<unknown>").to_string(),
3327 "Consts",
3328 ),
3329 )
3330 });
3331
3332 let mut diff = Vec::new();
3333 for t @ (value, _) in consts {
3334 if data.remove(&value).is_none() {
3335 diff.push(t);
3336 }
3337 }
3338 diff.extend(data);
3339
3340 let mut fmt_diff = String::new();
3341 for (value, (name, source)) in diff {
3342 fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3343 }
3344
3345 assert!(
3346 fmt_diff.is_empty(),
3347 "Values defined in data do not match values defined in consts. Difference:\n{fmt_diff}"
3348 );
3349 }
3350
3351 #[test]
3352 fn test_ea() {
3353 check_enum(
3354 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3355 EastAsianWidth::ALL_VALUES,
3356 );
3357 }
3358
3359 #[test]
3360 fn test_ccc() {
3361 check_enum(
3362 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3363 CanonicalCombiningClass::ALL_VALUES,
3364 );
3365 }
3366
3367 #[test]
3368 fn test_jt() {
3369 check_enum(
3370 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3371 JoiningType::ALL_VALUES,
3372 );
3373 }
3374
3375 #[test]
3376 fn test_insc() {
3377 check_enum(
3378 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3379 IndicSyllabicCategory::ALL_VALUES,
3380 );
3381 }
3382
3383 #[test]
3384 fn test_sb() {
3385 check_enum(
3386 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3387 SentenceBreak::ALL_VALUES,
3388 );
3389 }
3390
3391 #[test]
3392 fn test_wb() {
3393 check_enum(
3394 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3395 WordBreak::ALL_VALUES,
3396 );
3397 }
3398
3399 #[test]
3400 fn test_bc() {
3401 check_enum(
3402 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3403 BidiClass::ALL_VALUES,
3404 );
3405 }
3406
3407 #[test]
3408 fn test_hst() {
3409 check_enum(
3410 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3411 HangulSyllableType::ALL_VALUES,
3412 );
3413 }
3414
3415 #[test]
3416 fn test_vo() {
3417 check_enum(
3418 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3419 VerticalOrientation::ALL_VALUES,
3420 );
3421 }
3422}