unicode_properties/lib.rs
1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Query character Unicode properties according to
12//! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/)
13//! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/)
14//! rules.
15//!
16//! Currently we support the `General_Category` property as well as `Emoji` and `Emoji_Component`.
17//!
18//! Future properties can be added as requested.
19//!
20//! ```rust
21//! use unicode_properties::UnicodeEmoji;
22//! use unicode_properties::UnicodeGeneralCategory;
23//!
24//! let ch = '🦀'; // U+1F980 CRAB
25//! let is_emoji = ch.is_emoji_char();
26//! let group = ch.general_category_group();
27//! println!("{}({:?})", ch, group);
28//! println!("The above char {} for use as emoji char.",
29//! if is_emoji { "is recommended" } else { "is not recommended" });
30//! ```
31//!
32//! # Features
33//!
34//! ## `general-category`
35//!
36//! Provides the most general classification of a character,
37//! based on its primary characteristic.
38//!
39//! ## `emoji`
40//!
41//! Provides the emoji character properties of a character.
42//!
43#![no_std]
44#![deny(missing_docs)]
45
46#[rustfmt::skip]
47mod tables;
48
49#[cfg(feature = "emoji")]
50/// Query the emoji character properties of a character.
51pub mod emoji {
52 pub use crate::tables::emoji::EmojiStatus;
53
54 /// Query the emoji character properties of a character.
55 pub trait UnicodeEmoji: Sized {
56 /// Returns the emoji character properties in a status enum.
57 fn emoji_status(self) -> EmojiStatus;
58
59 /// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`.
60 #[allow(clippy::wrong_self_convention)]
61 fn is_emoji_char(self) -> bool {
62 crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status())
63 }
64
65 /// Checks whether this character are used in emoji sequences where they're not
66 /// intended for independent, direct input, i.e. `Emoji_Component=YES`.
67 #[allow(clippy::wrong_self_convention)]
68 fn is_emoji_component(self) -> bool {
69 crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status())
70 }
71
72 /// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES`
73 #[allow(clippy::wrong_self_convention)]
74 fn is_emoji_char_or_emoji_component(self) -> bool {
75 crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component(
76 self.emoji_status(),
77 )
78 }
79 }
80
81 impl UnicodeEmoji for char {
82 fn emoji_status(self) -> EmojiStatus {
83 crate::tables::emoji::emoji_status(self)
84 }
85 }
86
87 #[inline]
88 /// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character.
89 ///
90 /// It can be used between the elements of a sequence of characters to indicate that
91 /// a single glyph should be presented if available.
92 pub fn is_zwj(c: char) -> bool {
93 c == '\u{200D}'
94 }
95
96 #[inline]
97 /// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to
98 /// request an emoji presentation for an emoji character.
99 pub fn is_emoji_presentation_selector(c: char) -> bool {
100 c == '\u{FE0F}'
101 }
102
103 #[inline]
104 /// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to
105 /// request a text presentation for an emoji character.
106 pub fn is_text_presentation_selector(c: char) -> bool {
107 c == '\u{FE0E}'
108 }
109
110 #[inline]
111 /// Checks whether this character is one of the Regional Indicator characters.
112 ///
113 /// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence.
114 pub fn is_regional_indicator(c: char) -> bool {
115 matches!(c, '\u{1F1E6}'..='\u{1F1FF}')
116 }
117
118 #[inline]
119 /// Checks whether this character is one of the Tag Characters.
120 ///
121 /// These can be used in indicating variants or extensions of emoji characters.
122 pub fn is_tag_character(c: char) -> bool {
123 matches!(c, '\u{E0020}'..='\u{E007F}')
124 }
125}
126
127#[cfg(feature = "general-category")]
128/// Query the general category property of a character.
129pub mod general_category {
130 pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup};
131
132 /// Query the general category property of a character.
133 ///
134 /// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info.
135 pub trait UnicodeGeneralCategory: Sized {
136 /// Queries the most general classification of a character.
137 fn general_category(self) -> GeneralCategory;
138
139 /// Queries the grouping of the most general classification of a character.
140 fn general_category_group(self) -> GeneralCategoryGroup {
141 crate::tables::general_category::general_category_group(self.general_category())
142 }
143
144 /// Queries whether the most general classification of a character belongs to the `LetterCased` group
145 ///
146 /// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase`
147 /// categories, and is a subset of the `Letter` group.
148 #[allow(clippy::wrong_self_convention)]
149 fn is_letter_cased(self) -> bool {
150 crate::tables::general_category::general_category_is_letter_cased(
151 self.general_category(),
152 )
153 }
154 }
155
156 impl UnicodeGeneralCategory for char {
157 fn general_category(self) -> GeneralCategory {
158 crate::tables::general_category::general_category_of_char(self)
159 }
160 }
161}
162
163pub use tables::UNICODE_VERSION;
164
165#[cfg(feature = "emoji")]
166#[doc(inline)]
167pub use emoji::UnicodeEmoji;
168
169#[cfg(feature = "emoji")]
170#[doc(inline)]
171pub use emoji::EmojiStatus;
172
173#[cfg(feature = "general-category")]
174#[doc(inline)]
175pub use general_category::GeneralCategory;
176
177#[cfg(feature = "general-category")]
178#[doc(inline)]
179pub use general_category::GeneralCategoryGroup;
180
181#[cfg(feature = "general-category")]
182#[doc(inline)]
183pub use general_category::UnicodeGeneralCategory;