unicode_properties/
lib.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Query character Unicode properties according to
12//! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/)
13//! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/)
14//! rules.
15//!
16//! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`.
17//!
18//! Future properties can be added as requested.
19//!
20//! ```rust
21//! use unicode_properties::UnicodeEmoji;
22//! use unicode_properties::UnicodeGeneralCategory;
23//!
24//! let ch = '🦀'; // U+1F980 CRAB
25//! let is_emoji = ch.is_emoji_char();
26//! let group = ch.general_category_group();
27//! println!("{}({:?})", ch, group);
28//! println!("The above char {} for use as emoji char.",
29//!          if is_emoji { "is recommended" } else { "is not recommended" });
30//! ```
31//!
32//! # Features
33//!
34//! ## `general-category`
35//!
36//! Provides the most general classification of a character,
37//! based on its primary characteristic.
38//!
39//! ## `emoji`
40//!
41//! Provides the emoji character properties of a character.
42//!
43#![no_std]
44#![deny(missing_docs)]
45
46#[rustfmt::skip]
47mod tables;
48
49#[cfg(feature = "emoji")]
50/// Query the emoji character properties of a character.
51pub mod emoji {
52    pub use crate::tables::emoji::EmojiStatus;
53
54    /// Query the emoji character properties of a character.
55    pub trait UnicodeEmoji: Sized {
56        /// Returns the emoji character properties in a status enum.
57        fn emoji_status(self) -> EmojiStatus;
58
59        /// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`.
60        #[allow(clippy::wrong_self_convention)]
61        fn is_emoji_char(self) -> bool {
62            crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status())
63        }
64
65        /// Checks whether this character are used in emoji sequences where they're not
66        /// intended for independent, direct input, i.e. `Emoji_Component=YES`.
67        #[allow(clippy::wrong_self_convention)]
68        fn is_emoji_component(self) -> bool {
69            crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status())
70        }
71
72        /// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES`
73        #[allow(clippy::wrong_self_convention)]
74        fn is_emoji_char_or_emoji_component(self) -> bool {
75            crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component(
76                self.emoji_status(),
77            )
78        }
79    }
80
81    impl UnicodeEmoji for char {
82        fn emoji_status(self) -> EmojiStatus {
83            crate::tables::emoji::emoji_status(self)
84        }
85    }
86
87    #[inline]
88    /// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character.
89    ///
90    /// It can be used between the elements of a sequence of characters to indicate that
91    /// a single glyph should be presented if available.
92    pub fn is_zwj(c: char) -> bool {
93        c == '\u{200D}'
94    }
95
96    #[inline]
97    /// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to
98    /// request an emoji presentation for an emoji character.
99    pub fn is_emoji_presentation_selector(c: char) -> bool {
100        c == '\u{FE0F}'
101    }
102
103    #[inline]
104    /// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to
105    /// request a text presentation for an emoji character.
106    pub fn is_text_presentation_selector(c: char) -> bool {
107        c == '\u{FE0E}'
108    }
109
110    #[inline]
111    /// Checks whether this character is one of the Regional Indicator characters.
112    ///
113    /// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence.
114    pub fn is_regional_indicator(c: char) -> bool {
115        matches!(c, '\u{1F1E6}'..='\u{1F1FF}')
116    }
117
118    #[inline]
119    /// Checks whether this character is one of the Tag Characters.
120    ///
121    /// These can be used in indicating variants or extensions of emoji characters.
122    pub fn is_tag_character(c: char) -> bool {
123        matches!(c, '\u{E0020}'..='\u{E007F}')
124    }
125}
126
127#[cfg(feature = "general-category")]
128/// Query the general category property of a character.
129pub mod general_category {
130    pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup};
131
132    /// Query the general category property of a character.
133    ///
134    /// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info.
135    pub trait UnicodeGeneralCategory: Sized {
136        /// Queries the most general classification of a character.
137        fn general_category(self) -> GeneralCategory;
138
139        /// Queries the grouping of the most general classification of a character.
140        fn general_category_group(self) -> GeneralCategoryGroup {
141            crate::tables::general_category::general_category_group(self.general_category())
142        }
143
144        /// Queries whether the most general classification of a character belongs to the `LetterCased` group
145        ///
146        /// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase`
147        /// categories, and is a subset of the `Letter` group.
148        #[allow(clippy::wrong_self_convention)]
149        fn is_letter_cased(self) -> bool {
150            crate::tables::general_category::general_category_is_letter_cased(
151                self.general_category(),
152            )
153        }
154    }
155
156    impl UnicodeGeneralCategory for char {
157        fn general_category(self) -> GeneralCategory {
158            crate::tables::general_category::general_category_of_char(self)
159        }
160    }
161}
162
163pub use tables::UNICODE_VERSION;
164
165#[cfg(feature = "emoji")]
166#[doc(inline)]
167pub use emoji::UnicodeEmoji;
168
169#[cfg(feature = "emoji")]
170#[doc(inline)]
171pub use emoji::EmojiStatus;
172
173#[cfg(feature = "general-category")]
174#[doc(inline)]
175pub use general_category::GeneralCategory;
176
177#[cfg(feature = "general-category")]
178#[doc(inline)]
179pub use general_category::GeneralCategoryGroup;
180
181#[cfg(feature = "general-category")]
182#[doc(inline)]
183pub use general_category::UnicodeGeneralCategory;