icu_properties/
props.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28///     pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29///     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30///     ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36    (
37        $ ( #[$meta:meta] )*
38        impl $enum_ty:ident {
39            $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40        }
41        #[test]
42        fn $consts_test:ident();
43    ) => {
44        $( #[$meta] )*
45        impl $enum_ty {
46            $(
47                $(#[$const_meta])*
48                $v const $i: $t = $e;
49            )*
50
51            /// All possible values of this enum in the Unicode version
52            /// from this ICU4X release.
53            pub const ALL_VALUES: &'static [$enum_ty] = &[
54                $($enum_ty::$i),*
55            ];
56        }
57
58        #[cfg(feature = "datagen")]
59        impl databake::Bake for $enum_ty {
60            fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
61                env.insert("icu_properties");
62                match *self {
63                    $(
64                        Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
65                    )*
66                    Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
67                }
68            }
69        }
70
71
72        impl From<$enum_ty> for u16  {
73            fn from(other: $enum_ty) -> Self {
74                other.0 as u16
75            }
76        }
77
78        #[test]
79        fn $consts_test() {
80            $(
81                assert_eq!(
82                    crate::names::PropertyNamesLong::<$enum_ty>::new().get($enum_ty::$i).unwrap()
83                        // Rust identifiers use camel case
84                        .replace('_', "")
85                        // We use Ethiopian
86                        .replace("Ethiopic", "Ethiopian")
87                        // Nastaliq is missing a long name?
88                        .replace("Aran", "Nastaliq")
89                        // We spell these out
90                        .replace("LVSyllable", "LeadingVowelSyllable")
91                        .replace("LVTSyllable", "LeadingVowelTrailingSyllable"),
92                    stringify!($i)
93                );
94            )*
95        }
96    }
97}
98
99pub use crate::code_point_map::EnumeratedProperty;
100
101macro_rules! make_enumerated_property {
102    (
103        name: $name:literal;
104        short_name: $short_name:literal;
105        ident: $value_ty:path;
106        data_marker: $data_marker:ty;
107        singleton: $singleton:ident;
108        $(ule_ty: $ule_ty:ty;)?
109    ) => {
110        impl crate::private::Sealed for $value_ty {}
111
112        impl EnumeratedProperty for $value_ty {
113            type DataMarker = $data_marker;
114            #[cfg(feature = "compiled_data")]
115            const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
116                crate::provider::Baked::$singleton;
117            const NAME: &'static [u8] = $name.as_bytes();
118            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
119        }
120
121        $(
122            impl zerovec::ule::AsULE for $value_ty {
123                type ULE = $ule_ty;
124
125                fn to_unaligned(self) -> Self::ULE {
126                    self.0.to_unaligned()
127                }
128                fn from_unaligned(unaligned: Self::ULE) -> Self {
129                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
130                }
131            }
132        )?
133    };
134}
135
136/// Enumerated property Bidi_Class
137///
138/// These are the categories required by the Unicode Bidirectional Algorithm.
139/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
140/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
141///
142/// # Example
143///
144/// ```
145/// use icu::properties::{props::BidiClass, CodePointMapData};
146///
147/// assert_eq!(
148///     CodePointMapData::<BidiClass>::new().get('y'),
149///     BidiClass::LeftToRight
150/// ); // U+0079
151/// assert_eq!(
152///     CodePointMapData::<BidiClass>::new().get('ع'),
153///     BidiClass::ArabicLetter
154/// ); // U+0639
155/// ```
156#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
157#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
158#[allow(clippy::exhaustive_structs)] // newtype
159#[repr(transparent)]
160pub struct BidiClass(pub(crate) u8);
161
162impl BidiClass {
163    /// Returns an ICU4C `UBidiClass` value.
164    pub const fn to_icu4c_value(self) -> u8 {
165        self.0
166    }
167    /// Constructor from an ICU4C `UBidiClass` value.
168    pub const fn from_icu4c_value(value: u8) -> Self {
169        Self(value)
170    }
171}
172
173create_const_array! {
174#[allow(non_upper_case_globals)]
175impl BidiClass {
176    /// (`L`) any strong left-to-right character
177    pub const LeftToRight: BidiClass = BidiClass(0);
178    /// (`R`) any strong right-to-left (non-Arabic-type) character
179    pub const RightToLeft: BidiClass = BidiClass(1);
180    /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
181    pub const EuropeanNumber: BidiClass = BidiClass(2);
182    /// (`ES`) plus and minus signs
183    pub const EuropeanSeparator: BidiClass = BidiClass(3);
184    /// (`ET`) a terminator in a numeric format context, includes currency signs
185    pub const EuropeanTerminator: BidiClass = BidiClass(4);
186    /// (`AN`) any Arabic-Indic digit
187    pub const ArabicNumber: BidiClass = BidiClass(5);
188    /// (`CS`) commas, colons, and slashes
189    pub const CommonSeparator: BidiClass = BidiClass(6);
190    /// (`B`) various newline characters
191    pub const ParagraphSeparator: BidiClass = BidiClass(7);
192    /// (`S`) various segment-related control codes
193    pub const SegmentSeparator: BidiClass = BidiClass(8);
194    /// (`WS`) spaces
195    pub const WhiteSpace: BidiClass = BidiClass(9);
196    /// (`ON`) most other symbols and punctuation marks
197    pub const OtherNeutral: BidiClass = BidiClass(10);
198    /// (`LRE`) U+202A: the LR embedding control
199    pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
200    /// (`LRO`) U+202D: the LR override control
201    pub const LeftToRightOverride: BidiClass = BidiClass(12);
202    /// (`AL`) any strong right-to-left (Arabic-type) character
203    pub const ArabicLetter: BidiClass = BidiClass(13);
204    /// (`RLE`) U+202B: the RL embedding control
205    pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
206    /// (`RLO`) U+202E: the RL override control
207    pub const RightToLeftOverride: BidiClass = BidiClass(15);
208    /// (`PDF`) U+202C: terminates an embedding or override control
209    pub const PopDirectionalFormat: BidiClass = BidiClass(16);
210    /// (`NSM`) any nonspacing mark
211    pub const NonspacingMark: BidiClass = BidiClass(17);
212    /// (`BN`) most format characters, control codes, or noncharacters
213    pub const BoundaryNeutral: BidiClass = BidiClass(18);
214    /// (`FSI`) U+2068: the first strong isolate control
215    pub const FirstStrongIsolate: BidiClass = BidiClass(19);
216    /// (`LRI`) U+2066: the LR isolate control
217    pub const LeftToRightIsolate: BidiClass = BidiClass(20);
218    /// (`RLI`) U+2067: the RL isolate control
219    pub const RightToLeftIsolate: BidiClass = BidiClass(21);
220    /// (`PDI`) U+2069: terminates an isolate control
221    pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
222}
223#[test]
224fn bidi_props_consts();
225}
226
227make_enumerated_property! {
228    name: "Bidi_Class";
229    short_name: "bc";
230    ident: BidiClass;
231    data_marker: crate::provider::PropertyEnumBidiClassV1;
232    singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
233    ule_ty: u8;
234}
235
236// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
237pub(crate) mod gc {
238    /// Enumerated property General_Category.
239    ///
240    /// General_Category specifies the most general classification of a code point, usually
241    /// determined based on the primary characteristic of the assigned character. For example, is the
242    /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
243    ///
244    /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
245    /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
246    /// crate::props::GeneralCategoryGroup).
247    ///
248    /// # Example
249    ///
250    /// ```
251    /// use icu::properties::{props::GeneralCategory, CodePointMapData};
252    ///
253    /// assert_eq!(
254    ///     CodePointMapData::<GeneralCategory>::new().get('木'),
255    ///     GeneralCategory::OtherLetter
256    /// ); // U+6728
257    /// assert_eq!(
258    ///     CodePointMapData::<GeneralCategory>::new().get('🎃'),
259    ///     GeneralCategory::OtherSymbol
260    /// ); // U+1F383 JACK-O-LANTERN
261    /// ```
262    #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
263    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
264    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
265    #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
266    #[allow(clippy::exhaustive_enums)] // this type is stable
267    #[zerovec::make_ule(GeneralCategoryULE)]
268    #[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV))]
269    #[repr(u8)]
270    pub enum GeneralCategory {
271        /// (`Cn`) A reserved unassigned code point or a noncharacter
272        Unassigned = 0,
273
274        /// (`Lu`) An uppercase letter
275        UppercaseLetter = 1,
276        /// (`Ll`) A lowercase letter
277        LowercaseLetter = 2,
278        /// (`Lt`) A digraphic letter, with first part uppercase
279        TitlecaseLetter = 3,
280        /// (`Lm`) A modifier letter
281        ModifierLetter = 4,
282        /// (`Lo`) Other letters, including syllables and ideographs
283        OtherLetter = 5,
284
285        /// (`Mn`) A nonspacing combining mark (zero advance width)
286        NonspacingMark = 6,
287        /// (`Mc`) A spacing combining mark (positive advance width)
288        SpacingMark = 8,
289        /// (`Me`) An enclosing combining mark
290        EnclosingMark = 7,
291
292        /// (`Nd`) A decimal digit
293        DecimalNumber = 9,
294        /// (`Nl`) A letterlike numeric character
295        LetterNumber = 10,
296        /// (`No`) A numeric character of other type
297        OtherNumber = 11,
298
299        /// (`Zs`) A space character (of various non-zero widths)
300        SpaceSeparator = 12,
301        /// (`Zl`) U+2028 LINE SEPARATOR only
302        LineSeparator = 13,
303        /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
304        ParagraphSeparator = 14,
305
306        /// (`Cc`) A C0 or C1 control code
307        Control = 15,
308        /// (`Cf`) A format control character
309        Format = 16,
310        /// (`Co`) A private-use character
311        PrivateUse = 17,
312        /// (`Cs`) A surrogate code point
313        Surrogate = 18,
314
315        /// (`Pd`) A dash or hyphen punctuation mark
316        DashPunctuation = 19,
317        /// (`Ps`) An opening punctuation mark (of a pair)
318        OpenPunctuation = 20,
319        /// (`Pe`) A closing punctuation mark (of a pair)
320        ClosePunctuation = 21,
321        /// (`Pc`) A connecting punctuation mark, like a tie
322        ConnectorPunctuation = 22,
323        /// (`Pi`) An initial quotation mark
324        InitialPunctuation = 28,
325        /// (`Pf`) A final quotation mark
326        FinalPunctuation = 29,
327        /// (`Po`) A punctuation mark of other type
328        OtherPunctuation = 23,
329
330        /// (`Sm`) A symbol of mathematical use
331        MathSymbol = 24,
332        /// (`Sc`) A currency sign
333        CurrencySymbol = 25,
334        /// (`Sk`) A non-letterlike modifier symbol
335        ModifierSymbol = 26,
336        /// (`So`) A symbol of other type
337        OtherSymbol = 27,
338    }
339}
340
341pub use gc::GeneralCategory;
342
343impl GeneralCategory {
344    /// All possible values of this enum
345    pub const ALL_VALUES: &'static [GeneralCategory] = &[
346        GeneralCategory::Unassigned,
347        GeneralCategory::UppercaseLetter,
348        GeneralCategory::LowercaseLetter,
349        GeneralCategory::TitlecaseLetter,
350        GeneralCategory::ModifierLetter,
351        GeneralCategory::OtherLetter,
352        GeneralCategory::NonspacingMark,
353        GeneralCategory::SpacingMark,
354        GeneralCategory::EnclosingMark,
355        GeneralCategory::DecimalNumber,
356        GeneralCategory::LetterNumber,
357        GeneralCategory::OtherNumber,
358        GeneralCategory::SpaceSeparator,
359        GeneralCategory::LineSeparator,
360        GeneralCategory::ParagraphSeparator,
361        GeneralCategory::Control,
362        GeneralCategory::Format,
363        GeneralCategory::PrivateUse,
364        GeneralCategory::Surrogate,
365        GeneralCategory::DashPunctuation,
366        GeneralCategory::OpenPunctuation,
367        GeneralCategory::ClosePunctuation,
368        GeneralCategory::ConnectorPunctuation,
369        GeneralCategory::InitialPunctuation,
370        GeneralCategory::FinalPunctuation,
371        GeneralCategory::OtherPunctuation,
372        GeneralCategory::MathSymbol,
373        GeneralCategory::CurrencySymbol,
374        GeneralCategory::ModifierSymbol,
375        GeneralCategory::OtherSymbol,
376    ];
377}
378
379#[test]
380fn gc_variants() {
381    for &variant in GeneralCategory::ALL_VALUES {
382        assert_eq!(
383            crate::names::PropertyNamesLong::<GeneralCategory>::new()
384                .get(variant)
385                .unwrap()
386                // Rust identifiers use camel case
387                .replace('_', ""),
388            format!("{variant:?}")
389        );
390    }
391}
392
393#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
394/// Error value for `impl TryFrom<u8> for GeneralCategory`.
395#[non_exhaustive]
396pub struct GeneralCategoryOutOfBoundsError;
397
398impl TryFrom<u8> for GeneralCategory {
399    type Error = GeneralCategoryOutOfBoundsError;
400    /// Construct this [`GeneralCategory`] from an integer, returning
401    /// an error if it is out of bounds
402    fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
403        GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
404    }
405}
406
407make_enumerated_property! {
408    name: "General_Category";
409    short_name: "gc";
410    ident: GeneralCategory;
411    data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
412    singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
413}
414
415/// Groupings of multiple General_Category property values.
416///
417/// Instances of `GeneralCategoryGroup` represent the defined multi-category
418/// values that are useful for users in certain contexts, such as regex. In
419/// other words, unlike [`GeneralCategory`], this supports groups of general
420/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
421/// `LowercaseLetter`, etc.
422///
423/// See <https://www.unicode.org/reports/tr44/> .
424///
425/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
426/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
427/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
428///
429/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
430#[derive(Copy, Clone, PartialEq, Debug, Eq)]
431#[allow(clippy::exhaustive_structs)] // newtype
432#[repr(transparent)]
433pub struct GeneralCategoryGroup(pub(crate) u32);
434
435impl crate::private::Sealed for GeneralCategoryGroup {}
436
437use GeneralCategory as GC;
438use GeneralCategoryGroup as GCG;
439
440#[allow(non_upper_case_globals)]
441impl GeneralCategoryGroup {
442    /// (`Lu`) An uppercase letter
443    pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
444    /// (`Ll`) A lowercase letter
445    pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
446    /// (`Lt`) A digraphic letter, with first part uppercase
447    pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
448    /// (`Lm`) A modifier letter
449    pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
450    /// (`Lo`) Other letters, including syllables and ideographs
451    pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
452    /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
453    pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
454        | (1 << (GC::LowercaseLetter as u32))
455        | (1 << (GC::TitlecaseLetter as u32)));
456    /// (`L`) The union of all letter categories
457    pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
458        | (1 << (GC::LowercaseLetter as u32))
459        | (1 << (GC::TitlecaseLetter as u32))
460        | (1 << (GC::ModifierLetter as u32))
461        | (1 << (GC::OtherLetter as u32)));
462
463    /// (`Mn`) A nonspacing combining mark (zero advance width)
464    pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
465    /// (`Mc`) A spacing combining mark (positive advance width)
466    pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
467    /// (`Me`) An enclosing combining mark
468    pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
469    /// (`M`) The union of all mark categories
470    pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
471        | (1 << (GC::EnclosingMark as u32))
472        | (1 << (GC::SpacingMark as u32)));
473
474    /// (`Nd`) A decimal digit
475    pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
476    /// (`Nl`) A letterlike numeric character
477    pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
478    /// (`No`) A numeric character of other type
479    pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
480    /// (`N`) The union of all number categories
481    pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
482        | (1 << (GC::LetterNumber as u32))
483        | (1 << (GC::OtherNumber as u32)));
484
485    /// (`Zs`) A space character (of various non-zero widths)
486    pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
487    /// (`Zl`) U+2028 LINE SEPARATOR only
488    pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
489    /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
490    pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
491    /// (`Z`) The union of all separator categories
492    pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
493        | (1 << (GC::LineSeparator as u32))
494        | (1 << (GC::ParagraphSeparator as u32)));
495
496    /// (`Cc`) A C0 or C1 control code
497    pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
498    /// (`Cf`) A format control character
499    pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
500    /// (`Co`) A private-use character
501    pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
502    /// (`Cs`) A surrogate code point
503    pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
504    /// (`Cn`) A reserved unassigned code point or a noncharacter
505    pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
506    /// (`C`) The union of all control code, reserved, and unassigned categories
507    pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
508        | (1 << (GC::Format as u32))
509        | (1 << (GC::PrivateUse as u32))
510        | (1 << (GC::Surrogate as u32))
511        | (1 << (GC::Unassigned as u32)));
512
513    /// (`Pd`) A dash or hyphen punctuation mark
514    pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
515    /// (`Ps`) An opening punctuation mark (of a pair)
516    pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
517    /// (`Pe`) A closing punctuation mark (of a pair)
518    pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
519    /// (`Pc`) A connecting punctuation mark, like a tie
520    pub const ConnectorPunctuation: GeneralCategoryGroup =
521        GCG(1 << (GC::ConnectorPunctuation as u32));
522    /// (`Pi`) An initial quotation mark
523    pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
524    /// (`Pf`) A final quotation mark
525    pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
526    /// (`Po`) A punctuation mark of other type
527    pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
528    /// (`P`) The union of all punctuation categories
529    pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
530        | (1 << (GC::OpenPunctuation as u32))
531        | (1 << (GC::ClosePunctuation as u32))
532        | (1 << (GC::ConnectorPunctuation as u32))
533        | (1 << (GC::OtherPunctuation as u32))
534        | (1 << (GC::InitialPunctuation as u32))
535        | (1 << (GC::FinalPunctuation as u32)));
536
537    /// (`Sm`) A symbol of mathematical use
538    pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
539    /// (`Sc`) A currency sign
540    pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
541    /// (`Sk`) A non-letterlike modifier symbol
542    pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
543    /// (`So`) A symbol of other type
544    pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
545    /// (`S`) The union of all symbol categories
546    pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
547        | (1 << (GC::CurrencySymbol as u32))
548        | (1 << (GC::ModifierSymbol as u32))
549        | (1 << (GC::OtherSymbol as u32)));
550
551    const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
552
553    /// Return whether the code point belongs in the provided multi-value category.
554    ///
555    /// ```
556    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
557    /// use icu::properties::CodePointMapData;
558    ///
559    /// let gc = CodePointMapData::<GeneralCategory>::new();
560    ///
561    /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
562    /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
563    ///
564    /// // U+0B1E ORIYA LETTER NYA
565    /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
566    /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
567    /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
568    ///
569    /// // U+0301 COMBINING ACUTE ACCENT
570    /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
571    /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
572    /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
573    ///
574    /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
575    /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
576    /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
577    ///
578    /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
579    /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
580    /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
581    ///
582    /// // U+2713 CHECK MARK
583    /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
584    /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
585    /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
586    ///
587    /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
588    /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
589    /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
590    ///
591    /// // U+E007F CANCEL TAG
592    /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
593    /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
594    /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
595    /// ```
596    pub const fn contains(self, val: GeneralCategory) -> bool {
597        0 != (1 << (val as u32)) & self.0
598    }
599
600    /// Produce a GeneralCategoryGroup that is the inverse of this one
601    ///
602    /// # Example
603    ///
604    /// ```rust
605    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
606    ///
607    /// let letter = GeneralCategoryGroup::Letter;
608    /// let not_letter = letter.complement();
609    ///
610    /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
611    /// assert!(!letter.contains(GeneralCategory::MathSymbol));
612    /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
613    /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
614    /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
615    /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
616    /// ```
617    pub const fn complement(self) -> Self {
618        // Mask off things not in Self::ALL to guarantee the mask
619        // values stay in-range
620        GeneralCategoryGroup(!self.0 & Self::ALL)
621    }
622
623    /// Return the group representing all GeneralCategory values
624    ///
625    /// # Example
626    ///
627    /// ```rust
628    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
629    ///
630    /// let all = GeneralCategoryGroup::all();
631    ///
632    /// assert!(all.contains(GeneralCategory::MathSymbol));
633    /// assert!(all.contains(GeneralCategory::OtherPunctuation));
634    /// assert!(all.contains(GeneralCategory::UppercaseLetter));
635    /// ```
636    pub const fn all() -> Self {
637        Self(Self::ALL)
638    }
639
640    /// Return the empty group
641    ///
642    /// # Example
643    ///
644    /// ```rust
645    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
646    ///
647    /// let empty = GeneralCategoryGroup::empty();
648    ///
649    /// assert!(!empty.contains(GeneralCategory::MathSymbol));
650    /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
651    /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
652    /// ```
653    pub const fn empty() -> Self {
654        Self(0)
655    }
656
657    /// Take the union of two groups
658    ///
659    /// # Example
660    ///
661    /// ```rust
662    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
663    ///
664    /// let letter = GeneralCategoryGroup::Letter;
665    /// let symbol = GeneralCategoryGroup::Symbol;
666    /// let union = letter.union(symbol);
667    ///
668    /// assert!(union.contains(GeneralCategory::MathSymbol));
669    /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
670    /// assert!(union.contains(GeneralCategory::UppercaseLetter));
671    /// ```
672    pub const fn union(self, other: Self) -> Self {
673        Self(self.0 | other.0)
674    }
675
676    /// Take the intersection of two groups
677    ///
678    /// # Example
679    ///
680    /// ```rust
681    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
682    ///
683    /// let letter = GeneralCategoryGroup::Letter;
684    /// let lu = GeneralCategoryGroup::UppercaseLetter;
685    /// let intersection = letter.intersection(lu);
686    ///
687    /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
688    /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
689    /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
690    /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
691    /// ```
692    pub const fn intersection(self, other: Self) -> Self {
693        Self(self.0 & other.0)
694    }
695}
696
697impl From<GeneralCategory> for GeneralCategoryGroup {
698    fn from(subcategory: GeneralCategory) -> Self {
699        GeneralCategoryGroup(1 << (subcategory as u32))
700    }
701}
702impl From<u32> for GeneralCategoryGroup {
703    fn from(mask: u32) -> Self {
704        // Mask off things not in Self::ALL to guarantee the mask
705        // values stay in-range
706        GeneralCategoryGroup(mask & Self::ALL)
707    }
708}
709impl From<GeneralCategoryGroup> for u32 {
710    fn from(group: GeneralCategoryGroup) -> Self {
711        group.0
712    }
713}
714
715/// Enumerated property Script.
716///
717/// This is used with both the Script and Script_Extensions Unicode properties.
718/// Each character is assigned a single Script, but characters that are used in
719/// a particular subset of scripts will be in more than one Script_Extensions set.
720/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
721/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
722/// determine whether a code point belongs to a certain script, you should use
723/// [`ScriptWithExtensionsBorrowed::has_script`].
724///
725/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
726/// See `UScriptCode` in ICU4C.
727///
728/// # Example
729///
730/// ```
731/// use icu::properties::{CodePointMapData, props::Script};
732///
733/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han);  // U+6728
734/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common);  // U+1F383 JACK-O-LANTERN
735/// ```
736/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
737#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
738#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
739#[allow(clippy::exhaustive_structs)] // newtype
740#[repr(transparent)]
741pub struct Script(pub(crate) u16);
742
743impl Script {
744    /// Returns an ICU4C `UScriptCode` value.
745    pub const fn to_icu4c_value(self) -> u16 {
746        self.0
747    }
748    /// Constructor from an ICU4C `UScriptCode` value.
749    pub const fn from_icu4c_value(value: u16) -> Self {
750        Self(value)
751    }
752}
753
754create_const_array! {
755#[allow(missing_docs)] // These constants don't need individual documentation.
756#[allow(non_upper_case_globals)]
757impl Script {
758    pub const Adlam: Script = Script(167);
759    pub const Ahom: Script = Script(161);
760    pub const AnatolianHieroglyphs: Script = Script(156);
761    pub const Arabic: Script = Script(2);
762    pub const Armenian: Script = Script(3);
763    pub const Avestan: Script = Script(117);
764    pub const Balinese: Script = Script(62);
765    pub const Bamum: Script = Script(130);
766    pub const BassaVah: Script = Script(134);
767    pub const Batak: Script = Script(63);
768    pub const Bengali: Script = Script(4);
769    pub const BeriaErfe: Script = Script(208);
770    pub const Bhaiksuki: Script = Script(168);
771    pub const Bopomofo: Script = Script(5);
772    pub const Brahmi: Script = Script(65);
773    pub const Braille: Script = Script(46);
774    pub const Buginese: Script = Script(55);
775    pub const Buhid: Script = Script(44);
776    pub const CanadianAboriginal: Script = Script(40);
777    pub const Carian: Script = Script(104);
778    pub const CaucasianAlbanian: Script = Script(159);
779    pub const Chakma: Script = Script(118);
780    pub const Cham: Script = Script(66);
781    pub const Cherokee: Script = Script(6);
782    pub const Chorasmian: Script = Script(189);
783    pub const Common: Script = Script(0);
784    pub const Coptic: Script = Script(7);
785    pub const Cuneiform: Script = Script(101);
786    pub const Cypriot: Script = Script(47);
787    pub const CyproMinoan: Script = Script(193);
788    pub const Cyrillic: Script = Script(8);
789    pub const Deseret: Script = Script(9);
790    pub const Devanagari: Script = Script(10);
791    pub const DivesAkuru: Script = Script(190);
792    pub const Dogra: Script = Script(178);
793    pub const Duployan: Script = Script(135);
794    pub const EgyptianHieroglyphs: Script = Script(71);
795    pub const Elbasan: Script = Script(136);
796    pub const Elymaic: Script = Script(185);
797    pub const Ethiopian: Script = Script(11);
798    pub const Garay: Script = Script(201);
799    pub const Georgian: Script = Script(12);
800    pub const Glagolitic: Script = Script(56);
801    pub const Gothic: Script = Script(13);
802    pub const Grantha: Script = Script(137);
803    pub const Greek: Script = Script(14);
804    pub const Gujarati: Script = Script(15);
805    pub const GunjalaGondi: Script = Script(179);
806    pub const Gurmukhi: Script = Script(16);
807    pub const GurungKhema: Script = Script(202);
808    pub const Han: Script = Script(17);
809    pub const Hangul: Script = Script(18);
810    pub const HanifiRohingya: Script = Script(182);
811    pub const Hanunoo: Script = Script(43);
812    pub const Hatran: Script = Script(162);
813    pub const Hebrew: Script = Script(19);
814    pub const Hiragana: Script = Script(20);
815    pub const ImperialAramaic: Script = Script(116);
816    pub const Inherited: Script = Script(1);
817    pub const InscriptionalPahlavi: Script = Script(122);
818    pub const InscriptionalParthian: Script = Script(125);
819    pub const Javanese: Script = Script(78);
820    pub const Kaithi: Script = Script(120);
821    pub const Kannada: Script = Script(21);
822    pub const Katakana: Script = Script(22);
823    pub const Kawi: Script = Script(198);
824    pub const KayahLi: Script = Script(79);
825    pub const Kharoshthi: Script = Script(57);
826    pub const KhitanSmallScript: Script = Script(191);
827    pub const Khmer: Script = Script(23);
828    pub const Khojki: Script = Script(157);
829    pub const Khudawadi: Script = Script(145);
830    pub const KiratRai: Script = Script(203);
831    pub const Lao: Script = Script(24);
832    pub const Latin: Script = Script(25);
833    pub const Lepcha: Script = Script(82);
834    pub const Limbu: Script = Script(48);
835    pub const LinearA: Script = Script(83);
836    pub const LinearB: Script = Script(49);
837    pub const Lisu: Script = Script(131);
838    pub const Lycian: Script = Script(107);
839    pub const Lydian: Script = Script(108);
840    pub const Mahajani: Script = Script(160);
841    pub const Makasar: Script = Script(180);
842    pub const Malayalam: Script = Script(26);
843    pub const Mandaic: Script = Script(84);
844    pub const Manichaean: Script = Script(121);
845    pub const Marchen: Script = Script(169);
846    pub const MasaramGondi: Script = Script(175);
847    pub const Medefaidrin: Script = Script(181);
848    pub const MeeteiMayek: Script = Script(115);
849    pub const MendeKikakui: Script = Script(140);
850    pub const MeroiticCursive: Script = Script(141);
851    pub const MeroiticHieroglyphs: Script = Script(86);
852    pub const Miao: Script = Script(92);
853    pub const Modi: Script = Script(163);
854    pub const Mongolian: Script = Script(27);
855    pub const Mro: Script = Script(149);
856    pub const Multani: Script = Script(164);
857    pub const Myanmar: Script = Script(28);
858    pub const Nabataean: Script = Script(143);
859    pub const NagMundari: Script = Script(199);
860    pub const Nandinagari: Script = Script(187);
861    pub const Nastaliq: Script = Script(200);
862    pub const Newa: Script = Script(170);
863    pub const NewTaiLue: Script = Script(59);
864    pub const Nko: Script = Script(87);
865    pub const Nushu: Script = Script(150);
866    pub const NyiakengPuachueHmong: Script = Script(186);
867    pub const Ogham: Script = Script(29);
868    pub const OlChiki: Script = Script(109);
869    pub const OldHungarian: Script = Script(76);
870    pub const OldItalic: Script = Script(30);
871    pub const OldNorthArabian: Script = Script(142);
872    pub const OldPermic: Script = Script(89);
873    pub const OldPersian: Script = Script(61);
874    pub const OldSogdian: Script = Script(184);
875    pub const OldSouthArabian: Script = Script(133);
876    pub const OldTurkic: Script = Script(88);
877    pub const OldUyghur: Script = Script(194);
878    pub const OlOnal: Script = Script(204);
879    pub const Oriya: Script = Script(31);
880    pub const Osage: Script = Script(171);
881    pub const Osmanya: Script = Script(50);
882    pub const PahawhHmong: Script = Script(75);
883    pub const Palmyrene: Script = Script(144);
884    pub const PauCinHau: Script = Script(165);
885    pub const PhagsPa: Script = Script(90);
886    pub const Phoenician: Script = Script(91);
887    pub const PsalterPahlavi: Script = Script(123);
888    pub const Rejang: Script = Script(110);
889    pub const Runic: Script = Script(32);
890    pub const Samaritan: Script = Script(126);
891    pub const Saurashtra: Script = Script(111);
892    pub const Sharada: Script = Script(151);
893    pub const Shavian: Script = Script(51);
894    pub const Siddham: Script = Script(166);
895    pub const Sidetic: Script = Script(209);
896    pub const SignWriting: Script = Script(112);
897    pub const Sinhala: Script = Script(33);
898    pub const Sogdian: Script = Script(183);
899    pub const SoraSompeng: Script = Script(152);
900    pub const Soyombo: Script = Script(176);
901    pub const Sundanese: Script = Script(113);
902    pub const Sunuwar: Script = Script(205);
903    pub const SylotiNagri: Script = Script(58);
904    pub const Syriac: Script = Script(34);
905    pub const Tagalog: Script = Script(42);
906    pub const Tagbanwa: Script = Script(45);
907    pub const TaiLe: Script = Script(52);
908    pub const TaiTham: Script = Script(106);
909    pub const TaiViet: Script = Script(127);
910    pub const TaiYo: Script = Script(210);
911    pub const Takri: Script = Script(153);
912    pub const Tamil: Script = Script(35);
913    pub const Tangsa: Script = Script(195);
914    pub const Tangut: Script = Script(154);
915    pub const Telugu: Script = Script(36);
916    pub const Thaana: Script = Script(37);
917    pub const Thai: Script = Script(38);
918    pub const Tibetan: Script = Script(39);
919    pub const Tifinagh: Script = Script(60);
920    pub const Tirhuta: Script = Script(158);
921    pub const Todhri: Script = Script(206);
922    pub const TolongSiki: Script = Script(211);
923    pub const Toto: Script = Script(196);
924    pub const TuluTigalari: Script = Script(207);
925    pub const Ugaritic: Script = Script(53);
926    pub const Unknown: Script = Script(103);
927    pub const Vai: Script = Script(99);
928    pub const Vithkuqi: Script = Script(197);
929    pub const Wancho: Script = Script(188);
930    pub const WarangCiti: Script = Script(146);
931    pub const Yezidi: Script = Script(192);
932    pub const Yi: Script = Script(41);
933    pub const ZanabazarSquare: Script = Script(177);
934}
935#[test]
936fn script_consts();
937}
938
939impl Script {
940    // Doesn't actually exist!
941    #[doc(hidden)]
942    #[allow(non_upper_case_globals)]
943    #[deprecated]
944    // Some high value that ICU4C will not use anytime soon
945    pub const Chisoi: Script = Self(60_000);
946}
947
948/// ✨ *Enabled with the `compiled_data` Cargo feature.*
949#[cfg(feature = "compiled_data")]
950impl From<Script> for icu_locale_core::subtags::Script {
951    fn from(value: Script) -> Self {
952        crate::PropertyNamesShort::new()
953            .get_locale_script(value)
954            .unwrap_or(icu_locale_core::subtags::script!("Zzzz"))
955    }
956}
957
958/// ✨ *Enabled with the `compiled_data` Cargo feature.*
959#[cfg(feature = "compiled_data")]
960impl From<icu_locale_core::subtags::Script> for Script {
961    fn from(value: icu_locale_core::subtags::Script) -> Self {
962        crate::PropertyParser::new()
963            .get_strict(value.as_str())
964            .unwrap_or(Self::Unknown)
965    }
966}
967
968make_enumerated_property! {
969    name: "Script";
970    short_name: "sc";
971    ident: Script;
972    data_marker: crate::provider::PropertyEnumScriptV1;
973    singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
974    ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
975}
976
977/// Enumerated property Hangul_Syllable_Type
978///
979/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
980/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
981///
982/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
983///
984/// # Example
985///
986/// ```
987/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
988///
989/// assert_eq!(
990///     CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
991///     HangulSyllableType::LeadingJamo
992/// ); // U+1100
993/// assert_eq!(
994///     CodePointMapData::<HangulSyllableType>::new().get('가'),
995///     HangulSyllableType::LeadingVowelSyllable
996/// ); // U+AC00
997/// ```
998#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
999#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1000#[allow(clippy::exhaustive_structs)] // newtype
1001#[repr(transparent)]
1002pub struct HangulSyllableType(pub(crate) u8);
1003
1004impl HangulSyllableType {
1005    /// Returns an ICU4C `UHangulSyllableType` value.
1006    pub const fn to_icu4c_value(self) -> u8 {
1007        self.0
1008    }
1009    /// Constructor from an ICU4C `UHangulSyllableType` value.
1010    pub const fn from_icu4c_value(value: u8) -> Self {
1011        Self(value)
1012    }
1013}
1014
1015create_const_array! {
1016#[allow(non_upper_case_globals)]
1017impl HangulSyllableType {
1018    /// (`NA`) not applicable (e.g. not a Hangul code point).
1019    pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
1020    /// (`L`) a conjoining leading consonant Jamo.
1021    pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
1022    /// (`V`) a conjoining vowel Jamo.
1023    pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
1024    /// (`T`) a conjoining trailing consonant Jamo.
1025    pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
1026    /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
1027    pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
1028    /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
1029    pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
1030}
1031#[test]
1032fn hangul_syllable_type_consts();
1033}
1034
1035make_enumerated_property! {
1036    name: "Hangul_Syllable_Type";
1037    short_name: "hst";
1038    ident: HangulSyllableType;
1039    data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
1040    singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
1041    ule_ty: u8;
1042
1043}
1044
1045/// Enumerated property East_Asian_Width.
1046///
1047/// See "Definition" in UAX #11 for the summary of each property value:
1048/// <https://www.unicode.org/reports/tr11/#Definitions>
1049///
1050/// # Example
1051///
1052/// ```
1053/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
1054///
1055/// assert_eq!(
1056///     CodePointMapData::<EastAsianWidth>::new().get('ｱ'),
1057///     EastAsianWidth::Halfwidth
1058/// ); // U+FF71: Halfwidth Katakana Letter A
1059/// assert_eq!(
1060///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
1061///     EastAsianWidth::Wide
1062/// ); //U+30A2: Katakana Letter A
1063/// ```
1064#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1065#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1066#[allow(clippy::exhaustive_structs)] // newtype
1067#[repr(transparent)]
1068pub struct EastAsianWidth(pub(crate) u8);
1069
1070impl EastAsianWidth {
1071    /// Returns an ICU4C `UEastAsianWidth` value.
1072    pub const fn to_icu4c_value(self) -> u8 {
1073        self.0
1074    }
1075    /// Constructor from an ICU4C `UEastAsianWidth` value.
1076    pub const fn from_icu4c_value(value: u8) -> Self {
1077        Self(value)
1078    }
1079}
1080
1081create_const_array! {
1082#[allow(missing_docs)] // These constants don't need individual documentation.
1083#[allow(non_upper_case_globals)]
1084impl EastAsianWidth {
1085    pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1086    pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1087    pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1088    pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1089    pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1090    pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1091}
1092#[test]
1093fn east_asian_width_consts();
1094}
1095
1096make_enumerated_property! {
1097    name: "East_Asian_Width";
1098    short_name: "ea";
1099    ident: EastAsianWidth;
1100    data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1101    singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1102    ule_ty: u8;
1103}
1104
1105/// Enumerated property Line_Break.
1106///
1107/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1108/// value: <https://www.unicode.org/reports/tr14/#Properties>
1109///
1110/// The numeric value is compatible with `ULineBreak` in ICU4C.
1111///
1112/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1113///
1114/// # Example
1115///
1116/// ```
1117/// use icu::properties::{props::LineBreak, CodePointMapData};
1118///
1119/// assert_eq!(
1120///     CodePointMapData::<LineBreak>::new().get(')'),
1121///     LineBreak::CloseParenthesis
1122/// ); // U+0029: Right Parenthesis
1123/// assert_eq!(
1124///     CodePointMapData::<LineBreak>::new().get('ぁ'),
1125///     LineBreak::ConditionalJapaneseStarter
1126/// ); //U+3041: Hiragana Letter Small A
1127/// ```
1128#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1129#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1130#[allow(clippy::exhaustive_structs)] // newtype
1131#[repr(transparent)]
1132pub struct LineBreak(pub(crate) u8);
1133
1134impl LineBreak {
1135    /// Returns an ICU4C `ULineBreak` value.
1136    pub const fn to_icu4c_value(self) -> u8 {
1137        self.0
1138    }
1139    /// Constructor from an ICU4C `ULineBreak` value.
1140    pub const fn from_icu4c_value(value: u8) -> Self {
1141        Self(value)
1142    }
1143}
1144
1145create_const_array! {
1146#[allow(missing_docs)] // These constants don't need individual documentation.
1147#[allow(non_upper_case_globals)]
1148impl LineBreak {
1149    pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1150    pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1151    pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1152    pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1153    pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1154    pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1155    pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1156    pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1157    pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1158    pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1159    pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1160    pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1161    pub const Glue: LineBreak = LineBreak(12); // name="GL"
1162    pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1163    pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1164    pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1165    pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1166    pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1167    pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1168    pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1169    pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1170    pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1171    pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1172    pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1173    pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1174    pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1175    pub const Space: LineBreak = LineBreak(26); // name="SP"
1176    pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1177    pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1178    pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1179    pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1180    pub const H2: LineBreak = LineBreak(31); // name="H2"
1181    pub const H3: LineBreak = LineBreak(32); // name="H3"
1182    pub const JL: LineBreak = LineBreak(33); // name="JL"
1183    pub const JT: LineBreak = LineBreak(34); // name="JT"
1184    pub const JV: LineBreak = LineBreak(35); // name="JV"
1185    pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1186    pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1187    pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1188    pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1189    pub const EBase: LineBreak = LineBreak(40); // name="EB"
1190    pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1191    pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1192
1193    // Added in ICU 74:
1194    pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1195    pub const AksaraPrebase: LineBreak = LineBreak(44); // name="AP"
1196    pub const AksaraStart: LineBreak = LineBreak(45); // name="AS"
1197    pub const ViramaFinal: LineBreak = LineBreak(46); // name="VF"
1198    pub const Virama: LineBreak = LineBreak(47); // name="VI"
1199
1200    // Added in ICU 78:
1201    pub const UnambiguousHyphen: LineBreak = LineBreak(48); // name="HH"
1202}
1203#[test]
1204fn line_break_consts();
1205}
1206
1207make_enumerated_property! {
1208    name: "Line_Break";
1209    short_name: "lb";
1210    ident: LineBreak;
1211    data_marker: crate::provider::PropertyEnumLineBreakV1;
1212    singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1213    ule_ty: u8;
1214}
1215
1216/// Enumerated property Grapheme_Cluster_Break.
1217///
1218/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1219/// summary of each property value:
1220/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1221///
1222/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1223///
1224/// # Example
1225///
1226/// ```
1227/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1228///
1229/// assert_eq!(
1230///     CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1231///     GraphemeClusterBreak::RegionalIndicator
1232/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1233/// assert_eq!(
1234///     CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1235///     GraphemeClusterBreak::SpacingMark
1236/// ); //U+0E33: Thai Character Sara Am
1237/// ```
1238#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1239#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1240#[allow(clippy::exhaustive_structs)] // this type is stable
1241#[repr(transparent)]
1242pub struct GraphemeClusterBreak(pub(crate) u8);
1243
1244impl GraphemeClusterBreak {
1245    /// Returns an ICU4C `UGraphemeClusterBreak` value.
1246    pub const fn to_icu4c_value(self) -> u8 {
1247        self.0
1248    }
1249    /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1250    pub const fn from_icu4c_value(value: u8) -> Self {
1251        Self(value)
1252    }
1253}
1254
1255create_const_array! {
1256#[allow(missing_docs)] // These constants don't need individual documentation.
1257#[allow(non_upper_case_globals)]
1258impl GraphemeClusterBreak {
1259    pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1260    pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1261    pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1262    pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1263    pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1264    pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1265    pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1266    pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1267    pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1268    pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1269    pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1270    pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1271    pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1272    /// This value is obsolete and unused.
1273    pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1274    /// This value is obsolete and unused.
1275    pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1276    /// This value is obsolete and unused.
1277    pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1278    /// This value is obsolete and unused.
1279    pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1280    pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1281}
1282#[test]
1283fn gcb_consts();
1284}
1285
1286make_enumerated_property! {
1287    name: "Grapheme_Cluster_Break";
1288    short_name: "GCB";
1289    ident: GraphemeClusterBreak;
1290    data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1291    singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1292    ule_ty: u8;
1293}
1294
1295/// Enumerated property Word_Break.
1296///
1297/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1298/// each property value:
1299/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1300///
1301/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1302///
1303/// # Example
1304///
1305/// ```
1306/// use icu::properties::{props::WordBreak, CodePointMapData};
1307///
1308/// assert_eq!(
1309///     CodePointMapData::<WordBreak>::new().get('.'),
1310///     WordBreak::MidNumLet
1311/// ); // U+002E: Full Stop
1312/// assert_eq!(
1313///     CodePointMapData::<WordBreak>::new().get('，'),
1314///     WordBreak::MidNum
1315/// ); // U+FF0C: Fullwidth Comma
1316/// ```
1317#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1318#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1319#[allow(clippy::exhaustive_structs)] // newtype
1320#[repr(transparent)]
1321pub struct WordBreak(pub(crate) u8);
1322
1323impl WordBreak {
1324    /// Returns an ICU4C `UWordBreak` value.
1325    pub const fn to_icu4c_value(self) -> u8 {
1326        self.0
1327    }
1328    /// Constructor from an ICU4C `UWordBreak` value.
1329    pub const fn from_icu4c_value(value: u8) -> Self {
1330        Self(value)
1331    }
1332}
1333
1334create_const_array! {
1335#[allow(missing_docs)] // These constants don't need individual documentation.
1336#[allow(non_upper_case_globals)]
1337impl WordBreak {
1338    pub const Other: WordBreak = WordBreak(0); // name="XX"
1339    pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1340    pub const Format: WordBreak = WordBreak(2); // name="FO"
1341    pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1342    pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1343    pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1344    pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1345    pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1346    pub const CR: WordBreak = WordBreak(8); // name="CR"
1347    pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1348    pub const LF: WordBreak = WordBreak(10); // name="LF"
1349    pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1350    pub const Newline: WordBreak = WordBreak(12); // name="NL"
1351    pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1352    pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1353    pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1354    pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1355    /// This value is obsolete and unused.
1356    pub const EBase: WordBreak = WordBreak(17); // name="EB"
1357    /// This value is obsolete and unused.
1358    pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1359    /// This value is obsolete and unused.
1360    pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1361    /// This value is obsolete and unused.
1362    pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1363    pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1364    pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1365}
1366#[test]
1367fn word_break_consts();
1368}
1369
1370make_enumerated_property! {
1371    name: "Word_Break";
1372    short_name: "WB";
1373    ident: WordBreak;
1374    data_marker: crate::provider::PropertyEnumWordBreakV1;
1375    singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1376    ule_ty: u8;
1377}
1378
1379/// Enumerated property Sentence_Break.
1380///
1381/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1382/// each property value:
1383/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1384///
1385/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1386///
1387/// # Example
1388///
1389/// ```
1390/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1391///
1392/// assert_eq!(
1393///     CodePointMapData::<SentenceBreak>::new().get('９'),
1394///     SentenceBreak::Numeric
1395/// ); // U+FF19: Fullwidth Digit Nine
1396/// assert_eq!(
1397///     CodePointMapData::<SentenceBreak>::new().get(','),
1398///     SentenceBreak::SContinue
1399/// ); // U+002C: Comma
1400/// ```
1401#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1402#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1403#[allow(clippy::exhaustive_structs)] // newtype
1404#[repr(transparent)]
1405pub struct SentenceBreak(pub(crate) u8);
1406
1407impl SentenceBreak {
1408    /// Returns an ICU4C `USentenceBreak` value.
1409    pub const fn to_icu4c_value(self) -> u8 {
1410        self.0
1411    }
1412    /// Constructor from an ICU4C `USentenceBreak` value.
1413    pub const fn from_icu4c_value(value: u8) -> Self {
1414        Self(value)
1415    }
1416}
1417
1418create_const_array! {
1419#[allow(missing_docs)] // These constants don't need individual documentation.
1420#[allow(non_upper_case_globals)]
1421impl SentenceBreak {
1422    pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1423    pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1424    pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1425    pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1426    pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1427    pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1428    pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1429    pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1430    pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1431    pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1432    pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1433    pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1434    pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1435    pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1436    pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1437}
1438#[test]
1439fn sentence_break_consts();
1440}
1441
1442make_enumerated_property! {
1443    name: "Sentence_Break";
1444    short_name: "SB";
1445    ident: SentenceBreak;
1446    data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1447    singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1448    ule_ty: u8;
1449}
1450
1451/// Property Canonical_Combining_Class.
1452/// See UAX #15:
1453/// <https://www.unicode.org/reports/tr15/>.
1454///
1455/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1456/// to look up the Canonical_Combining_Class property by scalar value.
1457///
1458/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1459/// to look up the Canonical_Combining_Class property by scalar value.
1460///
1461/// # Example
1462///
1463/// ```
1464/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1465///
1466/// assert_eq!(
1467///     CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1468///     CanonicalCombiningClass::NotReordered
1469/// ); // U+0061: LATIN SMALL LETTER A
1470/// assert_eq!(
1471///     CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1472///     CanonicalCombiningClass::Above
1473/// ); // U+0301: COMBINING ACUTE ACCENT
1474/// ```
1475//
1476// NOTE: The Pernosco debugger has special knowledge
1477// of this struct. Please do not change the bit layout
1478// or the crate-module-qualified name of this struct
1479// without coordination.
1480#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1481#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1482#[allow(clippy::exhaustive_structs)] // newtype
1483#[repr(transparent)]
1484pub struct CanonicalCombiningClass(pub(crate) u8);
1485
1486impl CanonicalCombiningClass {
1487    /// Returns an ICU4C `UCanonicalCombiningClass` value.
1488    pub const fn to_icu4c_value(self) -> u8 {
1489        self.0
1490    }
1491    /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1492    pub const fn from_icu4c_value(value: u8) -> Self {
1493        Self(value)
1494    }
1495}
1496
1497create_const_array! {
1498// These constant names come from PropertyValueAliases.txt
1499#[allow(missing_docs)] // These constants don't need individual documentation.
1500#[allow(non_upper_case_globals)]
1501impl CanonicalCombiningClass {
1502    pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1503    pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1504    pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1505    pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1506    pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1507    pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1508    pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1509    pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1510    pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1511    pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1512    pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1513    pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1514    pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1515    pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1516    pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1517    pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1518    pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1519    pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1520    pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1521    pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1522    pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1523    pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1524    pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1525    pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1526    pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1527    pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1528    pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1529    pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1530    pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1531    pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1532    pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1533    pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1534    pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1535    pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1536    pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1537    pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1538    pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1539    pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1540    pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1541    pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1542    pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1543    pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1544    pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1545    pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1546    pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1547    pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1548    pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1549    pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1550    pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1551    pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1552    pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1553    pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1554    pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1555    pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1556    pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1557    pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1558    pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1559    pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1560}
1561#[test]
1562fn ccc_consts();
1563}
1564
1565make_enumerated_property! {
1566    name: "Canonical_Combining_Class";
1567    short_name: "ccc";
1568    ident: CanonicalCombiningClass;
1569    data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1570    singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1571    ule_ty: u8;
1572}
1573
1574/// Property Indic_Conjunct_Break.
1575/// See UAX #44:
1576/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1577///
1578/// # Example
1579///
1580/// ```
1581/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1582///
1583/// assert_eq!(
1584///     CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1585///     IndicConjunctBreak::None
1586/// );
1587/// assert_eq!(
1588///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1589///     IndicConjunctBreak::Linker
1590/// );
1591/// assert_eq!(
1592///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1593///     IndicConjunctBreak::Consonant
1594/// );
1595/// assert_eq!(
1596///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1597///     IndicConjunctBreak::Extend
1598/// );
1599/// ```
1600#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1601#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1602#[allow(clippy::exhaustive_structs)] // newtype
1603#[repr(transparent)]
1604pub struct IndicConjunctBreak(pub(crate) u8);
1605
1606impl IndicConjunctBreak {
1607    /// Returns an ICU4C `UIndicConjunctBreak` value.
1608    pub const fn to_icu4c_value(self) -> u8 {
1609        self.0
1610    }
1611    /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1612    pub const fn from_icu4c_value(value: u8) -> Self {
1613        Self(value)
1614    }
1615}
1616
1617create_const_array! {
1618#[allow(missing_docs)] // These constants don't need individual documentation.
1619#[allow(non_upper_case_globals)]
1620impl IndicConjunctBreak {
1621    pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1622    pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1623    pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1624    pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1625}
1626#[test]
1627fn indic_conjunct_break_consts();
1628}
1629
1630make_enumerated_property! {
1631    name: "Indic_Conjunct_Break";
1632    short_name: "InCB";
1633    ident: IndicConjunctBreak;
1634    data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1635    singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1636    ule_ty: u8;
1637}
1638
1639/// Property Indic_Syllabic_Category.
1640/// See UAX #44:
1641/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1642///
1643/// # Example
1644///
1645/// ```
1646/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1647///
1648/// assert_eq!(
1649///     CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1650///     IndicSyllabicCategory::Other
1651/// );
1652/// assert_eq!(
1653///     CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1654///     IndicSyllabicCategory::Bindu
1655/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1656/// ```
1657#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1658#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1659#[allow(clippy::exhaustive_structs)] // newtype
1660#[repr(transparent)]
1661pub struct IndicSyllabicCategory(pub(crate) u8);
1662
1663impl IndicSyllabicCategory {
1664    /// Returns an ICU4C `UIndicSyllabicCategory` value.
1665    pub const fn to_icu4c_value(self) -> u8 {
1666        self.0
1667    }
1668    /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1669    pub const fn from_icu4c_value(value: u8) -> Self {
1670        Self(value)
1671    }
1672}
1673
1674create_const_array! {
1675#[allow(missing_docs)] // These constants don't need individual documentation.
1676#[allow(non_upper_case_globals)]
1677impl IndicSyllabicCategory {
1678    pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1679    pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1680    pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1681    pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1682    pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1683    pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1684    pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1685    pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1686    pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1687    pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1688    pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1689    pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1690    pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1691    pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1692    pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1693    pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(15);
1694    pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(16);
1695    pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1696    pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1697    pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1698    pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1699    pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1700    pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1701    pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1702    pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1703    pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1704    pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1705    pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1706    pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1707    pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1708    pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1709    pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1710    pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1711    pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1712    pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1713    pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1714    pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1715}
1716#[test]
1717fn indic_syllabic_category_consts();
1718}
1719
1720make_enumerated_property! {
1721    name: "Indic_Syllabic_Category";
1722    short_name: "InSC";
1723    ident: IndicSyllabicCategory;
1724    data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1725    singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1726    ule_ty: u8;
1727}
1728
1729/// Enumerated property Joining_Type.
1730///
1731/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1732/// each property value.
1733///
1734/// # Example
1735///
1736/// ```
1737/// use icu::properties::{props::JoiningType, CodePointMapData};
1738///
1739/// assert_eq!(
1740///     CodePointMapData::<JoiningType>::new().get('ؠ'),
1741///     JoiningType::DualJoining
1742/// ); // U+0620: Arabic Letter Kashmiri Yeh
1743/// assert_eq!(
1744///     CodePointMapData::<JoiningType>::new().get('𐫍'),
1745///     JoiningType::LeftJoining
1746/// ); // U+10ACD: Manichaean Letter Heth
1747/// ```
1748#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1749#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1750#[allow(clippy::exhaustive_structs)] // newtype
1751#[repr(transparent)]
1752pub struct JoiningType(pub(crate) u8);
1753
1754impl JoiningType {
1755    /// Returns an ICU4C `UJoiningType` value.
1756    pub const fn to_icu4c_value(self) -> u8 {
1757        self.0
1758    }
1759    /// Constructor from an ICU4C `UJoiningType` value.
1760    pub const fn from_icu4c_value(value: u8) -> Self {
1761        Self(value)
1762    }
1763}
1764
1765create_const_array! {
1766#[allow(missing_docs)] // These constants don't need individual documentation.
1767#[allow(non_upper_case_globals)]
1768impl JoiningType {
1769    pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1770    pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1771    pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1772    pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1773    pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1774    pub const Transparent: JoiningType = JoiningType(5); // name="T"
1775}
1776#[test]
1777fn joining_type_consts();
1778}
1779
1780make_enumerated_property! {
1781    name: "Joining_Type";
1782    short_name: "jt";
1783    ident: JoiningType;
1784    data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1785    singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1786    ule_ty: u8;
1787}
1788
1789/// Property Vertical_Orientation
1790///
1791/// See UTR #50:
1792/// <https://www.unicode.org/reports/tr50/#vo>
1793///
1794/// # Example
1795///
1796/// ```
1797/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1798///
1799/// assert_eq!(
1800///     CodePointMapData::<VerticalOrientation>::new().get('a'),
1801///     VerticalOrientation::Rotated
1802/// );
1803/// assert_eq!(
1804///     CodePointMapData::<VerticalOrientation>::new().get('§'),
1805///     VerticalOrientation::Upright
1806/// );
1807/// assert_eq!(
1808///     CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1809///     VerticalOrientation::TransformedRotated
1810/// );
1811/// assert_eq!(
1812///     CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1813///     VerticalOrientation::TransformedUpright
1814/// );
1815/// ```
1816#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1817#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1818#[allow(clippy::exhaustive_structs)] // newtype
1819#[repr(transparent)]
1820pub struct VerticalOrientation(pub(crate) u8);
1821
1822impl VerticalOrientation {
1823    /// Returns an ICU4C `UVerticalOrientation` value.
1824    pub const fn to_icu4c_value(self) -> u8 {
1825        self.0
1826    }
1827    /// Constructor from an ICU4C `UVerticalOrientation` value.
1828    pub const fn from_icu4c_value(value: u8) -> Self {
1829        Self(value)
1830    }
1831}
1832
1833create_const_array! {
1834#[allow(missing_docs)] // These constants don't need individual documentation.
1835#[allow(non_upper_case_globals)]
1836impl VerticalOrientation {
1837    pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1838    pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1839    pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1840    pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1841}
1842#[test]
1843fn vertical_orientation_consts();
1844}
1845
1846make_enumerated_property! {
1847    name: "Vertical_Orientation";
1848    short_name: "vo";
1849    ident: VerticalOrientation;
1850    data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1851    singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1852    ule_ty: u8;
1853}
1854
1855pub use crate::code_point_set::BinaryProperty;
1856
1857macro_rules! make_binary_property {
1858    (
1859        name: $name:literal;
1860        short_name: $short_name:literal;
1861        ident: $ident:ident;
1862        data_marker: $data_marker:ty;
1863        singleton: $singleton:ident;
1864            $(#[$doc:meta])+
1865    ) => {
1866        $(#[$doc])+
1867        #[derive(Debug)]
1868        #[non_exhaustive]
1869        pub struct $ident;
1870
1871        impl crate::private::Sealed for $ident {}
1872
1873        impl BinaryProperty for $ident {
1874        type DataMarker = $data_marker;
1875            #[cfg(feature = "compiled_data")]
1876            const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1877                &crate::provider::Baked::$singleton;
1878            const NAME: &'static [u8] = $name.as_bytes();
1879            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1880        }
1881    };
1882}
1883
1884make_binary_property! {
1885    name: "ASCII_Hex_Digit";
1886    short_name: "AHex";
1887    ident: AsciiHexDigit;
1888    data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1889    singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1890    /// ASCII characters commonly used for the representation of hexadecimal numbers.
1891    ///
1892    /// # Example
1893    ///
1894    /// ```
1895    /// use icu::properties::CodePointSetData;
1896    /// use icu::properties::props::AsciiHexDigit;
1897    ///
1898    /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1899    ///
1900    /// assert!(ascii_hex_digit.contains('3'));
1901    /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1902    /// assert!(ascii_hex_digit.contains('A'));
1903    /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1904    /// ```
1905}
1906
1907make_binary_property! {
1908    name: "alnum";
1909    short_name: "alnum";
1910    ident: Alnum;
1911    data_marker: crate::provider::PropertyBinaryAlnumV1;
1912    singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1913    /// Characters with the `Alphabetic` or `Decimal_Number` property.
1914    ///
1915    /// This is defined for POSIX compatibility.
1916}
1917
1918make_binary_property! {
1919    name: "Alphabetic";
1920    short_name: "Alpha";
1921    ident: Alphabetic;
1922    data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1923    singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1924    /// Alphabetic characters.
1925    ///
1926    /// # Example
1927    ///
1928    /// ```
1929    /// use icu::properties::CodePointSetData;
1930    /// use icu::properties::props::Alphabetic;
1931    ///
1932    /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1933    ///
1934    /// assert!(!alphabetic.contains('3'));
1935    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1936    /// assert!(alphabetic.contains('A'));
1937    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1938    /// ```
1939
1940}
1941
1942make_binary_property! {
1943    name: "Bidi_Control";
1944    short_name: "Bidi_C";
1945    ident: BidiControl;
1946    data_marker: crate::provider::PropertyBinaryBidiControlV1;
1947    singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1948    /// Format control characters which have specific functions in the Unicode Bidirectional
1949    /// Algorithm.
1950    ///
1951    /// # Example
1952    ///
1953    /// ```
1954    /// use icu::properties::CodePointSetData;
1955    /// use icu::properties::props::BidiControl;
1956    ///
1957    /// let bidi_control = CodePointSetData::new::<BidiControl>();
1958    ///
1959    /// assert!(bidi_control.contains('\u{200F}'));  // RIGHT-TO-LEFT MARK
1960    /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
1961    /// ```
1962
1963}
1964
1965make_binary_property! {
1966    name: "Bidi_Mirrored";
1967    short_name: "Bidi_M";
1968    ident: BidiMirrored;
1969    data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1970    singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1971    /// Characters that are mirrored in bidirectional text.
1972    ///
1973    /// # Example
1974    ///
1975    /// ```
1976    /// use icu::properties::CodePointSetData;
1977    /// use icu::properties::props::BidiMirrored;
1978    ///
1979    /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1980    ///
1981    /// assert!(bidi_mirrored.contains('['));
1982    /// assert!(bidi_mirrored.contains(']'));
1983    /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
1984    /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
1985    /// ```
1986
1987}
1988
1989make_binary_property! {
1990    name: "blank";
1991    short_name: "blank";
1992    ident: Blank;
1993    data_marker: crate::provider::PropertyBinaryBlankV1;
1994    singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1995    /// Horizontal whitespace characters
1996
1997}
1998
1999make_binary_property! {
2000    name: "Cased";
2001    short_name: "Cased";
2002    ident: Cased;
2003    data_marker: crate::provider::PropertyBinaryCasedV1;
2004    singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
2005    /// Uppercase, lowercase, and titlecase characters.
2006    ///
2007    /// # Example
2008    ///
2009    /// ```
2010    /// use icu::properties::CodePointSetData;
2011    /// use icu::properties::props::Cased;
2012    ///
2013    /// let cased = CodePointSetData::new::<Cased>();
2014    ///
2015    /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
2016    /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
2017    /// ```
2018
2019}
2020
2021make_binary_property! {
2022    name: "Case_Ignorable";
2023    short_name: "CI";
2024    ident: CaseIgnorable;
2025    data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
2026    singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
2027    /// Characters which are ignored for casing purposes.
2028    ///
2029    /// # Example
2030    ///
2031    /// ```
2032    /// use icu::properties::CodePointSetData;
2033    /// use icu::properties::props::CaseIgnorable;
2034    ///
2035    /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
2036    ///
2037    /// assert!(case_ignorable.contains(':'));
2038    /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMBDA
2039    /// ```
2040
2041}
2042
2043make_binary_property! {
2044    name: "Full_Composition_Exclusion";
2045    short_name: "Comp_Ex";
2046    ident: FullCompositionExclusion;
2047    data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
2048    singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
2049    /// Characters that are excluded from composition.
2050    ///
2051    /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
2052
2053}
2054
2055make_binary_property! {
2056    name: "Changes_When_Casefolded";
2057    short_name: "CWCF";
2058    ident: ChangesWhenCasefolded;
2059    data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
2060    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
2061    /// Characters whose normalized forms are not stable under case folding.
2062    ///
2063    /// # Example
2064    ///
2065    /// ```
2066    /// use icu::properties::CodePointSetData;
2067    /// use icu::properties::props::ChangesWhenCasefolded;
2068    ///
2069    /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
2070    ///
2071    /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
2072    /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
2073    /// ```
2074
2075}
2076
2077make_binary_property! {
2078    name: "Changes_When_Casemapped";
2079    short_name: "CWCM";
2080    ident: ChangesWhenCasemapped;
2081    data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
2082    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
2083    /// Characters which may change when they undergo case mapping.
2084
2085}
2086
2087make_binary_property! {
2088    name: "Changes_When_NFKC_Casefolded";
2089    short_name: "CWKCF";
2090    ident: ChangesWhenNfkcCasefolded;
2091    data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
2092    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
2093    /// Characters which are not identical to their `NFKC_Casefold` mapping.
2094    ///
2095    /// # Example
2096    ///
2097    /// ```
2098    /// use icu::properties::CodePointSetData;
2099    /// use icu::properties::props::ChangesWhenNfkcCasefolded;
2100    ///
2101    /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
2102    ///
2103    /// assert!(changes_when_nfkc_casefolded.contains('🄵'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
2104    /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2105    /// ```
2106
2107}
2108
2109make_binary_property! {
2110    name: "Changes_When_Lowercased";
2111    short_name: "CWL";
2112    ident: ChangesWhenLowercased;
2113    data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2114    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2115    /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2116    ///
2117    /// # Example
2118    ///
2119    /// ```
2120    /// use icu::properties::CodePointSetData;
2121    /// use icu::properties::props::ChangesWhenLowercased;
2122    ///
2123    /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2124    ///
2125    /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2126    /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
2127    /// ```
2128
2129}
2130
2131make_binary_property! {
2132    name: "Changes_When_Titlecased";
2133    short_name: "CWT";
2134    ident: ChangesWhenTitlecased;
2135    data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2136    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2137    /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2138    ///
2139    /// # Example
2140    ///
2141    /// ```
2142    /// use icu::properties::CodePointSetData;
2143    /// use icu::properties::props::ChangesWhenTitlecased;
2144    ///
2145    /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2146    ///
2147    /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
2148    /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
2149    /// ```
2150
2151}
2152
2153make_binary_property! {
2154    name: "Changes_When_Uppercased";
2155    short_name: "CWU";
2156    ident: ChangesWhenUppercased;
2157    data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2158    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2159    /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2160    ///
2161    /// # Example
2162    ///
2163    /// ```
2164    /// use icu::properties::CodePointSetData;
2165    /// use icu::properties::props::ChangesWhenUppercased;
2166    ///
2167    /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2168    ///
2169    /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
2170    /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
2171    /// ```
2172
2173}
2174
2175make_binary_property! {
2176    name: "Dash";
2177    short_name: "Dash";
2178    ident: Dash;
2179    data_marker: crate::provider::PropertyBinaryDashV1;
2180    singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2181    /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2182    /// their compatibility equivalents.
2183    ///
2184    /// # Example
2185    ///
2186    /// ```
2187    /// use icu::properties::CodePointSetData;
2188    /// use icu::properties::props::Dash;
2189    ///
2190    /// let dash = CodePointSetData::new::<Dash>();
2191    ///
2192    /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
2193    /// assert!(dash.contains('-'));  // U+002D
2194    /// assert!(!dash.contains('='));  // U+003D
2195    /// ```
2196
2197}
2198
2199make_binary_property! {
2200    name: "Deprecated";
2201    short_name: "Dep";
2202    ident: Deprecated;
2203    data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2204    singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2205    /// Deprecated characters.
2206    ///
2207    /// No characters will ever be removed from the standard, but the
2208    /// usage of deprecated characters is strongly discouraged.
2209    ///
2210    /// # Example
2211    ///
2212    /// ```
2213    /// use icu::properties::CodePointSetData;
2214    /// use icu::properties::props::Deprecated;
2215    ///
2216    /// let deprecated = CodePointSetData::new::<Deprecated>();
2217    ///
2218    /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2219    /// assert!(!deprecated.contains('A'));
2220    /// ```
2221
2222}
2223
2224make_binary_property! {
2225    name: "Default_Ignorable_Code_Point";
2226    short_name: "DI";
2227    ident: DefaultIgnorableCodePoint;
2228    data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2229    singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2230    /// For programmatic determination of default ignorable code points.
2231    ///
2232    /// New characters that
2233    /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2234    /// ranges, permitting programs to correctly handle the default rendering of such
2235    /// characters when not otherwise supported.
2236    ///
2237    /// # Example
2238    ///
2239    /// ```
2240    /// use icu::properties::CodePointSetData;
2241    /// use icu::properties::props::DefaultIgnorableCodePoint;
2242    ///
2243    /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2244    ///
2245    /// assert!(default_ignorable_code_point.contains('\u{180B}'));  // MONGOLIAN FREE VARIATION SELECTOR ONE
2246    /// assert!(!default_ignorable_code_point.contains('E'));
2247    /// ```
2248
2249}
2250
2251make_binary_property! {
2252    name: "Diacritic";
2253    short_name: "Dia";
2254    ident: Diacritic;
2255    data_marker: crate::provider::PropertyBinaryDiacriticV1;
2256    singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2257    /// Characters that linguistically modify the meaning of another character to which they apply.
2258    ///
2259    /// # Example
2260    ///
2261    /// ```
2262    /// use icu::properties::CodePointSetData;
2263    /// use icu::properties::props::Diacritic;
2264    ///
2265    /// let diacritic = CodePointSetData::new::<Diacritic>();
2266    ///
2267    /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
2268    /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
2269    /// ```
2270
2271}
2272
2273make_binary_property! {
2274    name: "Emoji_Modifier_Base";
2275    short_name: "EBase";
2276    ident: EmojiModifierBase;
2277    data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2278    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2279    /// Characters that can serve as a base for emoji modifiers.
2280    ///
2281    /// # Example
2282    ///
2283    /// ```
2284    /// use icu::properties::CodePointSetData;
2285    /// use icu::properties::props::EmojiModifierBase;
2286    ///
2287    /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2288    ///
2289    /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
2290    /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
2291    /// ```
2292
2293}
2294
2295make_binary_property! {
2296    name: "Emoji_Component";
2297    short_name: "EComp";
2298    ident: EmojiComponent;
2299    data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2300    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2301    /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2302    /// separate choices, such as base characters for emoji keycaps.
2303    ///
2304    /// # Example
2305    ///
2306    /// ```
2307    /// use icu::properties::CodePointSetData;
2308    /// use icu::properties::props::EmojiComponent;
2309    ///
2310    /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2311    ///
2312    /// assert!(emoji_component.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2313    /// assert!(emoji_component.contains('\u{20E3}'));  // COMBINING ENCLOSING KEYCAP
2314    /// assert!(emoji_component.contains('7'));
2315    /// assert!(!emoji_component.contains('T'));
2316    /// ```
2317
2318}
2319
2320make_binary_property! {
2321    name: "Emoji_Modifier";
2322    short_name: "EMod";
2323    ident: EmojiModifier;
2324    data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2325    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2326    /// Characters that are emoji modifiers.
2327    ///
2328    /// # Example
2329    ///
2330    /// ```
2331    /// use icu::properties::CodePointSetData;
2332    /// use icu::properties::props::EmojiModifier;
2333    ///
2334    /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2335    ///
2336    /// assert!(emoji_modifier.contains('\u{1F3FD}'));  // EMOJI MODIFIER FITZPATRICK TYPE-4
2337    /// assert!(!emoji_modifier.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2338    /// ```
2339
2340}
2341
2342make_binary_property! {
2343    name: "Emoji";
2344    short_name: "Emoji";
2345    ident: Emoji;
2346    data_marker: crate::provider::PropertyBinaryEmojiV1;
2347    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2348    /// Characters that are emoji.
2349    ///
2350    /// # Example
2351    ///
2352    /// ```
2353    /// use icu::properties::CodePointSetData;
2354    /// use icu::properties::props::Emoji;
2355    ///
2356    /// let emoji = CodePointSetData::new::<Emoji>();
2357    ///
2358    /// assert!(emoji.contains('🔥'));  // U+1F525 FIRE
2359    /// assert!(!emoji.contains('V'));
2360    /// ```
2361
2362}
2363
2364make_binary_property! {
2365    name: "Emoji_Presentation";
2366    short_name: "EPres";
2367    ident: EmojiPresentation;
2368    data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2369    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2370    /// Characters that have emoji presentation by default.
2371    ///
2372    /// # Example
2373    ///
2374    /// ```
2375    /// use icu::properties::CodePointSetData;
2376    /// use icu::properties::props::EmojiPresentation;
2377    ///
2378    /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2379    ///
2380    /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2381    /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2382    /// ```
2383
2384}
2385
2386make_binary_property! {
2387    name: "Extender";
2388    short_name: "Ext";
2389    ident: Extender;
2390    data_marker: crate::provider::PropertyBinaryExtenderV1;
2391    singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2392    /// Characters whose principal function is to extend the value of a preceding alphabetic
2393    /// character or to extend the shape of adjacent characters.
2394    ///
2395    /// # Example
2396    ///
2397    /// ```
2398    /// use icu::properties::CodePointSetData;
2399    /// use icu::properties::props::Extender;
2400    ///
2401    /// let extender = CodePointSetData::new::<Extender>();
2402    ///
2403    /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
2404    /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2405    /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
2406    /// ```
2407
2408}
2409
2410make_binary_property! {
2411    name: "Extended_Pictographic";
2412    short_name: "ExtPict";
2413    ident: ExtendedPictographic;
2414    data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2415    singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2416    /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2417    /// emoji characters
2418    ///
2419    /// # Example
2420    ///
2421    /// ```
2422    /// use icu::properties::CodePointSetData;
2423    /// use icu::properties::props::ExtendedPictographic;
2424    ///
2425    /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2426    ///
2427    /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2428    /// assert!(!extended_pictographic.contains('🇪'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2429    /// ```
2430
2431}
2432
2433make_binary_property! {
2434    name: "graph";
2435    short_name: "graph";
2436    ident: Graph;
2437    data_marker: crate::provider::PropertyBinaryGraphV1;
2438    singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2439    /// Invisible characters.
2440    ///
2441    /// This is defined for POSIX compatibility.
2442
2443}
2444
2445make_binary_property! {
2446    name: "Grapheme_Base";
2447    short_name: "Gr_Base";
2448    ident: GraphemeBase;
2449    data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2450    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2451    /// Property used together with the definition of Standard Korean Syllable Block to define
2452    /// "Grapheme base".
2453    ///
2454    /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2455    ///
2456    /// # Example
2457    ///
2458    /// ```
2459    /// use icu::properties::CodePointSetData;
2460    /// use icu::properties::props::GraphemeBase;
2461    ///
2462    /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2463    ///
2464    /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2465    /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2466    /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2467    /// ```
2468
2469}
2470
2471make_binary_property! {
2472    name: "Grapheme_Extend";
2473    short_name: "Gr_Ext";
2474    ident: GraphemeExtend;
2475    data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2476    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2477    /// Property used to define "Grapheme extender".
2478    ///
2479    /// See D59 in Chapter 3, Conformance in the
2480    /// Unicode Standard.
2481    ///
2482    /// # Example
2483    ///
2484    /// ```
2485    /// use icu::properties::CodePointSetData;
2486    /// use icu::properties::props::GraphemeExtend;
2487    ///
2488    /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2489    ///
2490    /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2491    /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2492    /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2493    /// ```
2494
2495}
2496
2497make_binary_property! {
2498    name: "Grapheme_Link";
2499    short_name: "Gr_Link";
2500    ident: GraphemeLink;
2501    data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2502    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2503    /// Deprecated property.
2504    ///
2505    /// Formerly proposed for programmatic determination of grapheme
2506    /// cluster boundaries.
2507}
2508
2509make_binary_property! {
2510    name: "Hex_Digit";
2511    short_name: "Hex";
2512    ident: HexDigit;
2513    data_marker: crate::provider::PropertyBinaryHexDigitV1;
2514    singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2515    /// Characters commonly used for the representation of hexadecimal numbers, plus their
2516    /// compatibility equivalents.
2517    ///
2518    /// # Example
2519    ///
2520    /// ```
2521    /// use icu::properties::CodePointSetData;
2522    /// use icu::properties::props::HexDigit;
2523    ///
2524    /// let hex_digit = CodePointSetData::new::<HexDigit>();
2525    ///
2526    /// assert!(hex_digit.contains('0'));
2527    /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
2528    /// assert!(hex_digit.contains('f'));
2529    /// assert!(hex_digit.contains('ｆ'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2530    /// assert!(hex_digit.contains('Ｆ'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2531    /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2532    /// ```
2533}
2534
2535make_binary_property! {
2536    name: "Hyphen";
2537    short_name: "Hyphen";
2538    ident: Hyphen;
2539    data_marker: crate::provider::PropertyBinaryHyphenV1;
2540    singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2541    /// Deprecated property.
2542    ///
2543    /// Dashes which are used to mark connections between pieces of
2544    /// words, plus the Katakana middle dot.
2545}
2546
2547make_binary_property! {
2548    name: "ID_Compat_Math_Continue";
2549    short_name: "ID_Compat_Math_Continue";
2550    ident: IdCompatMathContinue;
2551    data_marker: crate::provider::PropertyBinaryIdCompatMathContinueV1;
2552    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_CONTINUE_V1;
2553    /// ID_Compat_Math_Continue Property
2554}
2555
2556make_binary_property! {
2557    name: "ID_Compat_Math_Start";
2558    short_name: "ID_Compat_Math_Start";
2559    ident: IdCompatMathStart;
2560    data_marker: crate::provider::PropertyBinaryIdCompatMathStartV1;
2561    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_START_V1;
2562    /// ID_Compat_Math_Start Property
2563}
2564
2565make_binary_property! {
2566    name: "ID_Continue";
2567    short_name: "IDC";
2568    ident: IdContinue;
2569    data_marker: crate::provider::PropertyBinaryIdContinueV1;
2570    singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2571    /// Characters that can come after the first character in an identifier.
2572    ///
2573    /// If using NFKC to
2574    /// fold differences between characters, use [`XidContinue`] instead.  See
2575    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2576    /// more details.
2577    ///
2578    /// # Example
2579    ///
2580    /// ```
2581    /// use icu::properties::CodePointSetData;
2582    /// use icu::properties::props::IdContinue;
2583    ///
2584    /// let id_continue = CodePointSetData::new::<IdContinue>();
2585    ///
2586    /// assert!(id_continue.contains('x'));
2587    /// assert!(id_continue.contains('1'));
2588    /// assert!(id_continue.contains('_'));
2589    /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2590    /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2591    /// assert!(id_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2592    /// ```
2593}
2594
2595make_binary_property! {
2596    name: "Ideographic";
2597    short_name: "Ideo";
2598    ident: Ideographic;
2599    data_marker: crate::provider::PropertyBinaryIdeographicV1;
2600    singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2601    /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2602    /// ideographs, or related siniform ideographs
2603    ///
2604    /// # Example
2605    ///
2606    /// ```
2607    /// use icu::properties::CodePointSetData;
2608    /// use icu::properties::props::Ideographic;
2609    ///
2610    /// let ideographic = CodePointSetData::new::<Ideographic>();
2611    ///
2612    /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2613    /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
2614    /// ```
2615}
2616
2617make_binary_property! {
2618    name: "ID_Start";
2619    short_name: "IDS";
2620    ident: IdStart;
2621    data_marker: crate::provider::PropertyBinaryIdStartV1;
2622    singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2623    /// Characters that can begin an identifier.
2624    ///
2625    /// If using NFKC to fold differences between
2626    /// characters, use [`XidStart`] instead.  See [`Unicode Standard Annex
2627    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2628    ///
2629    /// # Example
2630    ///
2631    /// ```
2632    /// use icu::properties::CodePointSetData;
2633    /// use icu::properties::props::IdStart;
2634    ///
2635    /// let id_start = CodePointSetData::new::<IdStart>();
2636    ///
2637    /// assert!(id_start.contains('x'));
2638    /// assert!(!id_start.contains('1'));
2639    /// assert!(!id_start.contains('_'));
2640    /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2641    /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2642    /// assert!(id_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2643    /// ```
2644}
2645
2646make_binary_property! {
2647    name: "IDS_Binary_Operator";
2648    short_name: "IDSB";
2649    ident: IdsBinaryOperator;
2650    data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2651    singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2652    /// Characters used in Ideographic Description Sequences.
2653    ///
2654    /// # Example
2655    ///
2656    /// ```
2657    /// use icu::properties::CodePointSetData;
2658    /// use icu::properties::props::IdsBinaryOperator;
2659    ///
2660    /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2661    ///
2662    /// assert!(ids_binary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2663    /// assert!(!ids_binary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2664    /// ```
2665}
2666
2667make_binary_property! {
2668    name: "IDS_Trinary_Operator";
2669    short_name: "IDST";
2670    ident: IdsTrinaryOperator;
2671    data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2672    singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2673    /// Characters used in Ideographic Description Sequences.
2674    ///
2675    /// # Example
2676    ///
2677    /// ```
2678    /// use icu::properties::CodePointSetData;
2679    /// use icu::properties::props::IdsTrinaryOperator;
2680    ///
2681    /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2682    ///
2683    /// assert!(ids_trinary_operator.contains('\u{2FF2}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2684    /// assert!(ids_trinary_operator.contains('\u{2FF3}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2685    /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2686    /// assert!(!ids_trinary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2687    /// assert!(!ids_trinary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2688    /// ```
2689}
2690
2691make_binary_property! {
2692    name: "IDS_Unary_Operator";
2693    short_name: "IDSU";
2694    ident: IdsUnaryOperator;
2695    data_marker: crate::provider::PropertyBinaryIdsUnaryOperatorV1;
2696    singleton: SINGLETON_PROPERTY_BINARY_IDS_UNARY_OPERATOR_V1;
2697    /// IDS_Unary_Operator Property
2698}
2699
2700make_binary_property! {
2701    name: "Join_Control";
2702    short_name: "Join_C";
2703    ident: JoinControl;
2704    data_marker: crate::provider::PropertyBinaryJoinControlV1;
2705    singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2706    /// Format control characters which have specific functions for control of cursive joining
2707    /// and ligation.
2708    ///
2709    /// # Example
2710    ///
2711    /// ```
2712    /// use icu::properties::CodePointSetData;
2713    /// use icu::properties::props::JoinControl;
2714    ///
2715    /// let join_control = CodePointSetData::new::<JoinControl>();
2716    ///
2717    /// assert!(join_control.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2718    /// assert!(join_control.contains('\u{200D}'));  // ZERO WIDTH JOINER
2719    /// assert!(!join_control.contains('\u{200E}'));
2720    /// ```
2721}
2722
2723make_binary_property! {
2724    name: "Logical_Order_Exception";
2725    short_name: "LOE";
2726    ident: LogicalOrderException;
2727    data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2728    singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2729    /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2730    ///
2731    /// # Example
2732    ///
2733    /// ```
2734    /// use icu::properties::CodePointSetData;
2735    /// use icu::properties::props::LogicalOrderException;
2736    ///
2737    /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2738    ///
2739    /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
2740    /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
2741    /// ```
2742}
2743
2744make_binary_property! {
2745    name: "Lowercase";
2746    short_name: "Lower";
2747    ident: Lowercase;
2748    data_marker: crate::provider::PropertyBinaryLowercaseV1;
2749    singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2750    /// Lowercase characters.
2751    ///
2752    /// # Example
2753    ///
2754    /// ```
2755    /// use icu::properties::CodePointSetData;
2756    /// use icu::properties::props::Lowercase;
2757    ///
2758    /// let lowercase = CodePointSetData::new::<Lowercase>();
2759    ///
2760    /// assert!(lowercase.contains('a'));
2761    /// assert!(!lowercase.contains('A'));
2762    /// ```
2763}
2764
2765make_binary_property! {
2766    name: "Math";
2767    short_name: "Math";
2768    ident: Math;
2769    data_marker: crate::provider::PropertyBinaryMathV1;
2770    singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2771    /// Characters used in mathematical notation.
2772    ///
2773    /// # Example
2774    ///
2775    /// ```
2776    /// use icu::properties::CodePointSetData;
2777    /// use icu::properties::props::Math;
2778    ///
2779    /// let math = CodePointSetData::new::<Math>();
2780    ///
2781    /// assert!(math.contains('='));
2782    /// assert!(math.contains('+'));
2783    /// assert!(!math.contains('-'));
2784    /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
2785    /// assert!(!math.contains('/'));
2786    /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
2787    /// ```
2788}
2789
2790make_binary_property! {
2791    name: "Modifier_Combining_Mark";
2792    short_name: "MCM";
2793    ident: ModifierCombiningMark;
2794    data_marker: crate::provider::PropertyBinaryModifierCombiningMarkV1;
2795    singleton: SINGLETON_PROPERTY_BINARY_MODIFIER_COMBINING_MARK_V1;
2796    /// Modifier_Combining_Mark Property
2797}
2798
2799make_binary_property! {
2800    name: "Noncharacter_Code_Point";
2801    short_name: "NChar";
2802    ident: NoncharacterCodePoint;
2803    data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2804    singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2805    /// Code points permanently reserved for internal use.
2806    ///
2807    /// # Example
2808    ///
2809    /// ```
2810    /// use icu::properties::CodePointSetData;
2811    /// use icu::properties::props::NoncharacterCodePoint;
2812    ///
2813    /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2814    ///
2815    /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2816    /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2817    /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2818    /// ```
2819}
2820
2821make_binary_property! {
2822    name: "NFC_Inert";
2823    short_name: "nfcinert";
2824    ident: NfcInert;
2825    data_marker: crate::provider::PropertyBinaryNfcInertV1;
2826    singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2827    /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2828}
2829
2830make_binary_property! {
2831    name: "NFD_Inert";
2832    short_name: "nfdinert";
2833    ident: NfdInert;
2834    data_marker: crate::provider::PropertyBinaryNfdInertV1;
2835    singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2836    /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2837}
2838
2839make_binary_property! {
2840    name: "NFKC_Inert";
2841    short_name: "nfkcinert";
2842    ident: NfkcInert;
2843    data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2844    singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2845    /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2846}
2847
2848make_binary_property! {
2849    name: "NFKD_Inert";
2850    short_name: "nfkdinert";
2851    ident: NfkdInert;
2852    data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2853    singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2854    /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2855}
2856
2857make_binary_property! {
2858    name: "Pattern_Syntax";
2859    short_name: "Pat_Syn";
2860    ident: PatternSyntax;
2861    data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2862    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2863    /// Characters used as syntax in patterns (such as regular expressions).
2864    ///
2865    /// See [`Unicode
2866    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2867    /// details.
2868    ///
2869    /// # Example
2870    ///
2871    /// ```
2872    /// use icu::properties::CodePointSetData;
2873    /// use icu::properties::props::PatternSyntax;
2874    ///
2875    /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2876    ///
2877    /// assert!(pattern_syntax.contains('{'));
2878    /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
2879    /// assert!(!pattern_syntax.contains('0'));
2880    /// ```
2881}
2882
2883make_binary_property! {
2884    name: "Pattern_White_Space";
2885    short_name: "Pat_WS";
2886    ident: PatternWhiteSpace;
2887    data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2888    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2889    /// Characters used as whitespace in patterns (such as regular expressions).
2890    ///
2891    /// See
2892    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2893    /// more details.
2894    ///
2895    /// # Example
2896    ///
2897    /// ```
2898    /// use icu::properties::CodePointSetData;
2899    /// use icu::properties::props::PatternWhiteSpace;
2900    ///
2901    /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2902    ///
2903    /// assert!(pattern_white_space.contains(' '));
2904    /// assert!(pattern_white_space.contains('\u{2029}'));  // PARAGRAPH SEPARATOR
2905    /// assert!(pattern_white_space.contains('\u{000A}'));  // NEW LINE
2906    /// assert!(!pattern_white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2907    /// ```
2908}
2909
2910make_binary_property! {
2911    name: "Prepended_Concatenation_Mark";
2912    short_name: "PCM";
2913    ident: PrependedConcatenationMark;
2914    data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2915    singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2916    /// A small class of visible format controls, which precede and then span a sequence of
2917    /// other characters, usually digits.
2918}
2919
2920make_binary_property! {
2921    name: "print";
2922    short_name: "print";
2923    ident: Print;
2924    data_marker: crate::provider::PropertyBinaryPrintV1;
2925    singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2926    /// Printable characters (visible characters and whitespace).
2927    ///
2928    /// This is defined for POSIX compatibility.
2929}
2930
2931make_binary_property! {
2932    name: "Quotation_Mark";
2933    short_name: "QMark";
2934    ident: QuotationMark;
2935    data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2936    singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2937    /// Punctuation characters that function as quotation marks.
2938    ///
2939    /// # Example
2940    ///
2941    /// ```
2942    /// use icu::properties::CodePointSetData;
2943    /// use icu::properties::props::QuotationMark;
2944    ///
2945    /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2946    ///
2947    /// assert!(quotation_mark.contains('\''));
2948    /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
2949    /// assert!(!quotation_mark.contains('<'));
2950    /// ```
2951}
2952
2953make_binary_property! {
2954    name: "Radical";
2955    short_name: "Radical";
2956    ident: Radical;
2957    data_marker: crate::provider::PropertyBinaryRadicalV1;
2958    singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2959    /// Characters used in the definition of Ideographic Description Sequences.
2960    ///
2961    /// # Example
2962    ///
2963    /// ```
2964    /// use icu::properties::CodePointSetData;
2965    /// use icu::properties::props::Radical;
2966    ///
2967    /// let radical = CodePointSetData::new::<Radical>();
2968    ///
2969    /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
2970    /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2971    /// ```
2972}
2973
2974make_binary_property! {
2975    name: "Regional_Indicator";
2976    short_name: "RI";
2977    ident: RegionalIndicator;
2978    data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2979    singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2980    /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2981    ///
2982    /// # Example
2983    ///
2984    /// ```
2985    /// use icu::properties::CodePointSetData;
2986    /// use icu::properties::props::RegionalIndicator;
2987    ///
2988    /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2989    ///
2990    /// assert!(regional_indicator.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2991    /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2992    /// assert!(!regional_indicator.contains('T'));
2993    /// ```
2994}
2995
2996make_binary_property! {
2997    name: "Soft_Dotted";
2998    short_name: "SD";
2999    ident: SoftDotted;
3000    data_marker: crate::provider::PropertyBinarySoftDottedV1;
3001    singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
3002    /// Characters with a "soft dot", like i or j.
3003    ///
3004    /// An accent placed on these characters causes
3005    /// the dot to disappear.
3006    ///
3007    /// # Example
3008    ///
3009    /// ```
3010    /// use icu::properties::CodePointSetData;
3011    /// use icu::properties::props::SoftDotted;
3012    ///
3013    /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
3014    ///
3015    /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
3016    /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
3017    /// ```
3018}
3019
3020make_binary_property! {
3021    name: "Segment_Starter";
3022    short_name: "segstart";
3023    ident: SegmentStarter;
3024    data_marker: crate::provider::PropertyBinarySegmentStarterV1;
3025    singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
3026    /// Characters that are starters in terms of Unicode normalization and combining character
3027    /// sequences.
3028}
3029
3030make_binary_property! {
3031    name: "Case_Sensitive";
3032    short_name: "Sensitive";
3033    ident: CaseSensitive;
3034    data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
3035    singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
3036    /// Characters that are either the source of a case mapping or in the target of a case
3037    /// mapping.
3038}
3039
3040make_binary_property! {
3041    name: "Sentence_Terminal";
3042    short_name: "STerm";
3043    ident: SentenceTerminal;
3044    data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
3045    singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
3046    /// Punctuation characters that generally mark the end of sentences.
3047    ///
3048    /// # Example
3049    ///
3050    /// ```
3051    /// use icu::properties::CodePointSetData;
3052    /// use icu::properties::props::SentenceTerminal;
3053    ///
3054    /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
3055    ///
3056    /// assert!(sentence_terminal.contains('.'));
3057    /// assert!(sentence_terminal.contains('?'));
3058    /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
3059    /// assert!(!sentence_terminal.contains(','));
3060    /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
3061    /// ```
3062}
3063
3064make_binary_property! {
3065    name: "Terminal_Punctuation";
3066    short_name: "Term";
3067    ident: TerminalPunctuation;
3068    data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
3069    singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
3070    /// Punctuation characters that generally mark the end of textual units.
3071    ///
3072    /// # Example
3073    ///
3074    /// ```
3075    /// use icu::properties::CodePointSetData;
3076    /// use icu::properties::props::TerminalPunctuation;
3077    ///
3078    /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
3079    ///
3080    /// assert!(terminal_punctuation.contains('.'));
3081    /// assert!(terminal_punctuation.contains('?'));
3082    /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
3083    /// assert!(terminal_punctuation.contains(','));
3084    /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
3085    /// ```
3086}
3087
3088make_binary_property! {
3089    name: "Unified_Ideograph";
3090    short_name: "UIdeo";
3091    ident: UnifiedIdeograph;
3092    data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
3093    singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
3094    /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
3095    ///
3096    /// # Example
3097    ///
3098    /// ```
3099    /// use icu::properties::CodePointSetData;
3100    /// use icu::properties::props::UnifiedIdeograph;
3101    ///
3102    /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
3103    ///
3104    /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
3105    /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
3106    /// assert!(!unified_ideograph.contains('𛅸'));  // U+1B178 NUSHU CHARACTER-1B178
3107    /// ```
3108}
3109
3110make_binary_property! {
3111    name: "Uppercase";
3112    short_name: "Upper";
3113    ident: Uppercase;
3114    data_marker: crate::provider::PropertyBinaryUppercaseV1;
3115    singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
3116    /// Uppercase characters.
3117    ///
3118    /// # Example
3119    ///
3120    /// ```
3121    /// use icu::properties::CodePointSetData;
3122    /// use icu::properties::props::Uppercase;
3123    ///
3124    /// let uppercase = CodePointSetData::new::<Uppercase>();
3125    ///
3126    /// assert!(uppercase.contains('U'));
3127    /// assert!(!uppercase.contains('u'));
3128    /// ```
3129}
3130
3131make_binary_property! {
3132    name: "Variation_Selector";
3133    short_name: "VS";
3134    ident: VariationSelector;
3135    data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
3136    singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
3137    /// Characters that are Variation Selectors.
3138    ///
3139    /// # Example
3140    ///
3141    /// ```
3142    /// use icu::properties::CodePointSetData;
3143    /// use icu::properties::props::VariationSelector;
3144    ///
3145    /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3146    ///
3147    /// assert!(variation_selector.contains('\u{180D}'));  // MONGOLIAN FREE VARIATION SELECTOR THREE
3148    /// assert!(!variation_selector.contains('\u{303E}'));  // IDEOGRAPHIC VARIATION INDICATOR
3149    /// assert!(variation_selector.contains('\u{FE0F}'));  // VARIATION SELECTOR-16
3150    /// assert!(!variation_selector.contains('\u{FE10}'));  // PRESENTATION FORM FOR VERTICAL COMMA
3151    /// assert!(variation_selector.contains('\u{E01EF}'));  // VARIATION SELECTOR-256
3152    /// ```
3153}
3154
3155make_binary_property! {
3156    name: "White_Space";
3157    short_name: "WSpace";
3158    ident: WhiteSpace;
3159    data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3160    singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3161    /// Spaces, separator characters and other control characters which should be treated by
3162    /// programming languages as "white space" for the purpose of parsing elements.
3163    ///
3164    /// # Example
3165    ///
3166    /// ```
3167    /// use icu::properties::CodePointSetData;
3168    /// use icu::properties::props::WhiteSpace;
3169    ///
3170    /// let white_space = CodePointSetData::new::<WhiteSpace>();
3171    ///
3172    /// assert!(white_space.contains(' '));
3173    /// assert!(white_space.contains('\u{000A}'));  // NEW LINE
3174    /// assert!(white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
3175    /// assert!(!white_space.contains('\u{200B}'));  // ZERO WIDTH SPACE
3176    /// ```
3177}
3178
3179make_binary_property! {
3180    name: "xdigit";
3181    short_name: "xdigit";
3182    ident: Xdigit;
3183    data_marker: crate::provider::PropertyBinaryXdigitV1;
3184    singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3185    /// Hexadecimal digits
3186    ///
3187    /// This is defined for POSIX compatibility.
3188}
3189
3190make_binary_property! {
3191    name: "XID_Continue";
3192    short_name: "XIDC";
3193    ident: XidContinue;
3194    data_marker: crate::provider::PropertyBinaryXidContinueV1;
3195    singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3196    /// Characters that can come after the first character in an identifier.
3197    ///
3198    /// See [`Unicode Standard Annex
3199    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3200    ///
3201    /// # Example
3202    ///
3203    /// ```
3204    /// use icu::properties::CodePointSetData;
3205    /// use icu::properties::props::XidContinue;
3206    ///
3207    /// let xid_continue = CodePointSetData::new::<XidContinue>();
3208    ///
3209    /// assert!(xid_continue.contains('x'));
3210    /// assert!(xid_continue.contains('1'));
3211    /// assert!(xid_continue.contains('_'));
3212    /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
3213    /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3214    /// assert!(!xid_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3215    /// ```
3216}
3217
3218make_binary_property! {
3219    name: "XID_Start";
3220    short_name: "XIDS";
3221    ident: XidStart;
3222    data_marker: crate::provider::PropertyBinaryXidStartV1;
3223    singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3224    /// Characters that can begin an identifier.
3225    ///
3226    /// See [`Unicode
3227    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3228    /// details.
3229    ///
3230    /// # Example
3231    ///
3232    /// ```
3233    /// use icu::properties::CodePointSetData;
3234    /// use icu::properties::props::XidStart;
3235    ///
3236    /// let xid_start = CodePointSetData::new::<XidStart>();
3237    ///
3238    /// assert!(xid_start.contains('x'));
3239    /// assert!(!xid_start.contains('1'));
3240    /// assert!(!xid_start.contains('_'));
3241    /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
3242    /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3243    /// assert!(!xid_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3244    /// ```
3245}
3246
3247pub use crate::emoji::EmojiSet;
3248
3249macro_rules! make_emoji_set {
3250    (
3251        name: $name:literal;
3252        short_name: $short_name:literal;
3253        ident: $ident:ident;
3254        data_marker: $data_marker:ty;
3255        singleton: $singleton:ident;
3256        $(#[$doc:meta])+
3257    ) => {
3258        $(#[$doc])+
3259        #[derive(Debug)]
3260        #[non_exhaustive]
3261        pub struct $ident;
3262
3263        impl crate::private::Sealed for $ident {}
3264
3265        impl EmojiSet for $ident {
3266            type DataMarker = $data_marker;
3267            #[cfg(feature = "compiled_data")]
3268            const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3269                &crate::provider::Baked::$singleton;
3270            const NAME: &'static [u8] = $name.as_bytes();
3271            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
3272        }
3273    }
3274}
3275
3276make_emoji_set! {
3277    name: "Basic_Emoji";
3278    short_name: "Basic_Emoji";
3279    ident: BasicEmoji;
3280    data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3281    singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3282    /// Characters and character sequences intended for general-purpose, independent, direct input.
3283    ///
3284    /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3285    /// details.
3286    ///
3287    /// # Example
3288    ///
3289    /// ```
3290    /// use icu::properties::EmojiSetData;
3291    /// use icu::properties::props::BasicEmoji;
3292    ///
3293    /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3294    ///
3295    /// assert!(!basic_emoji.contains('\u{0020}'));
3296    /// assert!(!basic_emoji.contains('\n'));
3297    /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3298    /// assert!(basic_emoji.contains_str("\u{1F983}"));
3299    /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3300    /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
3301    /// ```
3302}
3303
3304#[cfg(test)]
3305mod test_enumerated_property_completeness {
3306    use super::*;
3307    use std::collections::BTreeMap;
3308
3309    fn check_enum<'a, T: NamedEnumeratedProperty>(
3310        lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3311        consts: impl IntoIterator<Item = &'a T>,
3312    ) where
3313        u16: From<T>,
3314    {
3315        let mut data: BTreeMap<_, _> = lookup
3316            .map
3317            .iter()
3318            .map(|(name, value)| (value, (name, "Data")))
3319            .collect();
3320
3321        let names = crate::PropertyNamesLong::<T>::new();
3322        let consts = consts.into_iter().map(|value| {
3323            (
3324                u16::from(*value) as usize,
3325                (
3326                    names.get(*value).unwrap_or("<unknown>").to_string(),
3327                    "Consts",
3328                ),
3329            )
3330        });
3331
3332        let mut diff = Vec::new();
3333        for t @ (value, _) in consts {
3334            if data.remove(&value).is_none() {
3335                diff.push(t);
3336            }
3337        }
3338        diff.extend(data);
3339
3340        let mut fmt_diff = String::new();
3341        for (value, (name, source)) in diff {
3342            fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3343        }
3344
3345        assert!(
3346            fmt_diff.is_empty(),
3347            "Values defined in data do not match values defined in consts. Difference:\n{fmt_diff}"
3348        );
3349    }
3350
3351    #[test]
3352    fn test_ea() {
3353        check_enum(
3354            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3355            EastAsianWidth::ALL_VALUES,
3356        );
3357    }
3358
3359    #[test]
3360    fn test_ccc() {
3361        check_enum(
3362            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3363            CanonicalCombiningClass::ALL_VALUES,
3364        );
3365    }
3366
3367    #[test]
3368    fn test_jt() {
3369        check_enum(
3370            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3371            JoiningType::ALL_VALUES,
3372        );
3373    }
3374
3375    #[test]
3376    fn test_insc() {
3377        check_enum(
3378            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3379            IndicSyllabicCategory::ALL_VALUES,
3380        );
3381    }
3382
3383    #[test]
3384    fn test_sb() {
3385        check_enum(
3386            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3387            SentenceBreak::ALL_VALUES,
3388        );
3389    }
3390
3391    #[test]
3392    fn test_wb() {
3393        check_enum(
3394            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3395            WordBreak::ALL_VALUES,
3396        );
3397    }
3398
3399    #[test]
3400    fn test_bc() {
3401        check_enum(
3402            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3403            BidiClass::ALL_VALUES,
3404        );
3405    }
3406
3407    #[test]
3408    fn test_hst() {
3409        check_enum(
3410            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3411            HangulSyllableType::ALL_VALUES,
3412        );
3413    }
3414
3415    #[test]
3416    fn test_vo() {
3417        check_enum(
3418            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3419            VerticalOrientation::ALL_VALUES,
3420        );
3421    }
3422}
icu_properties/props.rs

icu_properties/
props.rs